1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1690 break; 1691 case MAT_SUBMAT_SINGLEIS: 1692 A->submat_singleis = flg; 1693 break; 1694 case MAT_STRUCTURE_ONLY: 1695 /* The option is handled directly by MatSetOption() */ 1696 break; 1697 default: 1698 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1704 { 1705 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1706 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1707 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1708 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1709 PetscInt *cmap, *idx_p; 1710 1711 PetscFunctionBegin; 1712 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1713 mat->getrowactive = PETSC_TRUE; 1714 1715 if (!mat->rowvalues && (idx || v)) { 1716 /* 1717 allocate enough space to hold information from the longest row. 1718 */ 1719 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1720 PetscInt max = 1, tmp; 1721 for (i = 0; i < matin->rmap->n; i++) { 1722 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1723 if (max < tmp) max = tmp; 1724 } 1725 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1726 } 1727 1728 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1729 lrow = row - rstart; 1730 1731 pvA = &vworkA; 1732 pcA = &cworkA; 1733 pvB = &vworkB; 1734 pcB = &cworkB; 1735 if (!v) { 1736 pvA = NULL; 1737 pvB = NULL; 1738 } 1739 if (!idx) { 1740 pcA = NULL; 1741 if (!v) pcB = NULL; 1742 } 1743 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1744 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1745 nztot = nzA + nzB; 1746 1747 cmap = mat->garray; 1748 if (v || idx) { 1749 if (nztot) { 1750 /* Sort by increasing column numbers, assuming A and B already sorted */ 1751 PetscInt imark = -1; 1752 if (v) { 1753 *v = v_p = mat->rowvalues; 1754 for (i = 0; i < nzB; i++) { 1755 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1756 else break; 1757 } 1758 imark = i; 1759 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1760 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1761 } 1762 if (idx) { 1763 *idx = idx_p = mat->rowindices; 1764 if (imark > -1) { 1765 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1766 } else { 1767 for (i = 0; i < nzB; i++) { 1768 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1769 else break; 1770 } 1771 imark = i; 1772 } 1773 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1774 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1775 } 1776 } else { 1777 if (idx) *idx = NULL; 1778 if (v) *v = NULL; 1779 } 1780 } 1781 *nz = nztot; 1782 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1783 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1788 { 1789 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1790 1791 PetscFunctionBegin; 1792 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1793 aij->getrowactive = PETSC_FALSE; 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1801 PetscInt i, j, cstart = mat->cmap->rstart; 1802 PetscReal sum = 0.0; 1803 const MatScalar *v, *amata, *bmata; 1804 PetscMPIInt iN; 1805 1806 PetscFunctionBegin; 1807 if (aij->size == 1) { 1808 PetscCall(MatNorm(aij->A, type, norm)); 1809 } else { 1810 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1811 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1812 if (type == NORM_FROBENIUS) { 1813 v = amata; 1814 for (i = 0; i < amat->nz; i++) { 1815 sum += PetscRealPart(PetscConj(*v) * (*v)); 1816 v++; 1817 } 1818 v = bmata; 1819 for (i = 0; i < bmat->nz; i++) { 1820 sum += PetscRealPart(PetscConj(*v) * (*v)); 1821 v++; 1822 } 1823 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp, *tmp2; 1828 PetscInt *jj, *garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1831 *norm = 0.0; 1832 v = amata; 1833 jj = amat->j; 1834 for (j = 0; j < amat->nz; j++) { 1835 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1836 v++; 1837 } 1838 v = bmata; 1839 jj = bmat->j; 1840 for (j = 0; j < bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1842 v++; 1843 } 1844 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1845 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1846 for (j = 0; j < mat->cmap->N; j++) { 1847 if (tmp2[j] > *norm) *norm = tmp2[j]; 1848 } 1849 PetscCall(PetscFree(tmp)); 1850 PetscCall(PetscFree(tmp2)); 1851 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1852 } else if (type == NORM_INFINITY) { /* max row norm */ 1853 PetscReal ntemp = 0.0; 1854 for (j = 0; j < aij->A->rmap->n; j++) { 1855 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1856 sum = 0.0; 1857 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1862 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 if (sum > ntemp) ntemp = sum; 1867 } 1868 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1872 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1873 } 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1878 { 1879 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1880 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1881 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1882 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1883 Mat B, A_diag, *B_diag; 1884 const MatScalar *pbv, *bv; 1885 1886 PetscFunctionBegin; 1887 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1888 ma = A->rmap->n; 1889 na = A->cmap->n; 1890 mb = a->B->rmap->n; 1891 nb = a->B->cmap->n; 1892 ai = Aloc->i; 1893 aj = Aloc->j; 1894 bi = Bloc->i; 1895 bj = Bloc->j; 1896 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1897 PetscInt *d_nnz, *g_nnz, *o_nnz; 1898 PetscSFNode *oloc; 1899 PETSC_UNUSED PetscSF sf; 1900 1901 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1902 /* compute d_nnz for preallocation */ 1903 PetscCall(PetscArrayzero(d_nnz, na)); 1904 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1905 /* compute local off-diagonal contributions */ 1906 PetscCall(PetscArrayzero(g_nnz, nb)); 1907 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1908 /* map those to global */ 1909 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1910 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1911 PetscCall(PetscSFSetFromOptions(sf)); 1912 PetscCall(PetscArrayzero(o_nnz, na)); 1913 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1915 PetscCall(PetscSFDestroy(&sf)); 1916 1917 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1918 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1919 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1920 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1921 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1922 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1923 } else { 1924 B = *matout; 1925 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1926 } 1927 1928 b = (Mat_MPIAIJ *)B->data; 1929 A_diag = a->A; 1930 B_diag = &b->A; 1931 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1932 A_diag_ncol = A_diag->cmap->N; 1933 B_diag_ilen = sub_B_diag->ilen; 1934 B_diag_i = sub_B_diag->i; 1935 1936 /* Set ilen for diagonal of B */ 1937 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1938 1939 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1940 very quickly (=without using MatSetValues), because all writes are local. */ 1941 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1942 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1943 1944 /* copy over the B part */ 1945 PetscCall(PetscMalloc1(bi[mb], &cols)); 1946 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1947 pbv = bv; 1948 row = A->rmap->rstart; 1949 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1950 cols_tmp = cols; 1951 for (i = 0; i < mb; i++) { 1952 ncol = bi[i + 1] - bi[i]; 1953 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1954 row++; 1955 if (pbv) pbv += ncol; 1956 if (cols_tmp) cols_tmp += ncol; 1957 } 1958 PetscCall(PetscFree(cols)); 1959 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1960 1961 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1962 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1963 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1964 *matout = B; 1965 } else { 1966 PetscCall(MatHeaderMerge(A, &B)); 1967 } 1968 PetscFunctionReturn(PETSC_SUCCESS); 1969 } 1970 1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1974 Mat a = aij->A, b = aij->B; 1975 PetscInt s1, s2, s3; 1976 1977 PetscFunctionBegin; 1978 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1979 if (rr) { 1980 PetscCall(VecGetLocalSize(rr, &s1)); 1981 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1982 /* Overlap communication with computation. */ 1983 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1984 } 1985 if (ll) { 1986 PetscCall(VecGetLocalSize(ll, &s1)); 1987 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1988 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1989 } 1990 /* scale the diagonal block */ 1991 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1992 1993 if (rr) { 1994 /* Do a scatter end and then right scale the off-diagonal block */ 1995 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1996 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1997 } 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatSetUnfactored(a->A)); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2011 { 2012 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2013 Mat a, b, c, d; 2014 PetscBool flg; 2015 2016 PetscFunctionBegin; 2017 a = matA->A; 2018 b = matA->B; 2019 c = matB->A; 2020 d = matB->B; 2021 2022 PetscCall(MatEqual(a, c, &flg)); 2023 if (flg) PetscCall(MatEqual(b, d, &flg)); 2024 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2029 { 2030 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2031 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2032 2033 PetscFunctionBegin; 2034 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2035 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2036 /* because of the column compression in the off-processor part of the matrix a->B, 2037 the number of columns in a->B and b->B may be different, hence we cannot call 2038 the MatCopy() directly on the two parts. If need be, we can provide a more 2039 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2040 then copying the submatrices */ 2041 PetscCall(MatCopy_Basic(A, B, str)); 2042 } else { 2043 PetscCall(MatCopy(a->A, b->A, str)); 2044 PetscCall(MatCopy(a->B, b->B, str)); 2045 } 2046 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 /* 2051 Computes the number of nonzeros per row needed for preallocation when X and Y 2052 have different nonzero structure. 2053 */ 2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2055 { 2056 PetscInt i, j, k, nzx, nzy; 2057 2058 PetscFunctionBegin; 2059 /* Set the number of nonzeros in the new matrix */ 2060 for (i = 0; i < m; i++) { 2061 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2062 nzx = xi[i + 1] - xi[i]; 2063 nzy = yi[i + 1] - yi[i]; 2064 nnz[i] = 0; 2065 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2066 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2067 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2068 nnz[i]++; 2069 } 2070 for (; k < nzy; k++) nnz[i]++; 2071 } 2072 PetscFunctionReturn(PETSC_SUCCESS); 2073 } 2074 2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2077 { 2078 PetscInt m = Y->rmap->N; 2079 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2080 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2081 2082 PetscFunctionBegin; 2083 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2088 { 2089 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 if (str == SAME_NONZERO_PATTERN) { 2093 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2094 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2095 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2096 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2097 } else { 2098 Mat B; 2099 PetscInt *nnz_d, *nnz_o; 2100 2101 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2102 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2103 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2104 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2105 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2106 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2107 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2108 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2109 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2110 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2111 PetscCall(MatHeaderMerge(Y, &B)); 2112 PetscCall(PetscFree(nnz_d)); 2113 PetscCall(PetscFree(nnz_o)); 2114 } 2115 PetscFunctionReturn(PETSC_SUCCESS); 2116 } 2117 2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2119 2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2121 { 2122 PetscFunctionBegin; 2123 if (PetscDefined(USE_COMPLEX)) { 2124 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2125 2126 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2127 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2128 } 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2135 2136 PetscFunctionBegin; 2137 PetscCall(MatRealPart(a->A)); 2138 PetscCall(MatRealPart(a->B)); 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2143 { 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCall(MatImaginaryPart(a->A)); 2148 PetscCall(MatImaginaryPart(a->B)); 2149 PetscFunctionReturn(PETSC_SUCCESS); 2150 } 2151 2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 PetscInt i, *idxb = NULL, m = A->rmap->n; 2156 PetscScalar *vv; 2157 Vec vB, vA; 2158 const PetscScalar *va, *vb; 2159 2160 PetscFunctionBegin; 2161 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2162 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2163 2164 PetscCall(VecGetArrayRead(vA, &va)); 2165 if (idx) { 2166 for (i = 0; i < m; i++) { 2167 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2168 } 2169 } 2170 2171 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2172 PetscCall(PetscMalloc1(m, &idxb)); 2173 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2174 2175 PetscCall(VecGetArrayWrite(v, &vv)); 2176 PetscCall(VecGetArrayRead(vB, &vb)); 2177 for (i = 0; i < m; i++) { 2178 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2179 vv[i] = vb[i]; 2180 if (idx) idx[i] = a->garray[idxb[i]]; 2181 } else { 2182 vv[i] = va[i]; 2183 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2184 } 2185 } 2186 PetscCall(VecRestoreArrayWrite(v, &vv)); 2187 PetscCall(VecRestoreArrayRead(vA, &va)); 2188 PetscCall(VecRestoreArrayRead(vB, &vb)); 2189 PetscCall(PetscFree(idxb)); 2190 PetscCall(VecDestroy(&vA)); 2191 PetscCall(VecDestroy(&vB)); 2192 PetscFunctionReturn(PETSC_SUCCESS); 2193 } 2194 2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2196 { 2197 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2198 Vec vB, vA; 2199 2200 PetscFunctionBegin; 2201 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2202 PetscCall(MatGetRowSumAbs(a->A, vA)); 2203 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2204 PetscCall(MatGetRowSumAbs(a->B, vB)); 2205 PetscCall(VecAXPY(vA, 1.0, vB)); 2206 PetscCall(VecDestroy(&vB)); 2207 PetscCall(VecCopy(vA, v)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscFunctionReturn(PETSC_SUCCESS); 2210 } 2211 2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2213 { 2214 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2215 PetscInt m = A->rmap->n, n = A->cmap->n; 2216 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2217 PetscInt *cmap = mat->garray; 2218 PetscInt *diagIdx, *offdiagIdx; 2219 Vec diagV, offdiagV; 2220 PetscScalar *a, *diagA, *offdiagA; 2221 const PetscScalar *ba, *bav; 2222 PetscInt r, j, col, ncols, *bi, *bj; 2223 Mat B = mat->B; 2224 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2225 2226 PetscFunctionBegin; 2227 /* When a process holds entire A and other processes have no entry */ 2228 if (A->cmap->N == n) { 2229 PetscCall(VecGetArrayWrite(v, &diagA)); 2230 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2231 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2232 PetscCall(VecDestroy(&diagV)); 2233 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } else if (n == 0) { 2236 if (m) { 2237 PetscCall(VecGetArrayWrite(v, &a)); 2238 for (r = 0; r < m; r++) { 2239 a[r] = 0.0; 2240 if (idx) idx[r] = -1; 2241 } 2242 PetscCall(VecRestoreArrayWrite(v, &a)); 2243 } 2244 PetscFunctionReturn(PETSC_SUCCESS); 2245 } 2246 2247 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2249 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2251 2252 /* Get offdiagIdx[] for implicit 0.0 */ 2253 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2254 ba = bav; 2255 bi = b->i; 2256 bj = b->j; 2257 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2258 for (r = 0; r < m; r++) { 2259 ncols = bi[r + 1] - bi[r]; 2260 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2261 offdiagA[r] = *ba; 2262 offdiagIdx[r] = cmap[0]; 2263 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2264 offdiagA[r] = 0.0; 2265 2266 /* Find first hole in the cmap */ 2267 for (j = 0; j < ncols; j++) { 2268 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2269 if (col > j && j < cstart) { 2270 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2271 break; 2272 } else if (col > j + n && j >= cstart) { 2273 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2274 break; 2275 } 2276 } 2277 if (j == ncols && ncols < A->cmap->N - n) { 2278 /* a hole is outside compressed Bcols */ 2279 if (ncols == 0) { 2280 if (cstart) { 2281 offdiagIdx[r] = 0; 2282 } else offdiagIdx[r] = cend; 2283 } else { /* ncols > 0 */ 2284 offdiagIdx[r] = cmap[ncols - 1] + 1; 2285 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2286 } 2287 } 2288 } 2289 2290 for (j = 0; j < ncols; j++) { 2291 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2292 offdiagA[r] = *ba; 2293 offdiagIdx[r] = cmap[*bj]; 2294 } 2295 ba++; 2296 bj++; 2297 } 2298 } 2299 2300 PetscCall(VecGetArrayWrite(v, &a)); 2301 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2302 for (r = 0; r < m; ++r) { 2303 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) idx[r] = cstart + diagIdx[r]; 2306 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) { 2309 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2310 idx[r] = cstart + diagIdx[r]; 2311 } else idx[r] = offdiagIdx[r]; 2312 } 2313 } else { 2314 a[r] = offdiagA[r]; 2315 if (idx) idx[r] = offdiagIdx[r]; 2316 } 2317 } 2318 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2319 PetscCall(VecRestoreArrayWrite(v, &a)); 2320 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2322 PetscCall(VecDestroy(&diagV)); 2323 PetscCall(VecDestroy(&offdiagV)); 2324 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2325 PetscFunctionReturn(PETSC_SUCCESS); 2326 } 2327 2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2329 { 2330 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2331 PetscInt m = A->rmap->n, n = A->cmap->n; 2332 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2333 PetscInt *cmap = mat->garray; 2334 PetscInt *diagIdx, *offdiagIdx; 2335 Vec diagV, offdiagV; 2336 PetscScalar *a, *diagA, *offdiagA; 2337 const PetscScalar *ba, *bav; 2338 PetscInt r, j, col, ncols, *bi, *bj; 2339 Mat B = mat->B; 2340 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2341 2342 PetscFunctionBegin; 2343 /* When a process holds entire A and other processes have no entry */ 2344 if (A->cmap->N == n) { 2345 PetscCall(VecGetArrayWrite(v, &diagA)); 2346 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2347 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2348 PetscCall(VecDestroy(&diagV)); 2349 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } else if (n == 0) { 2352 if (m) { 2353 PetscCall(VecGetArrayWrite(v, &a)); 2354 for (r = 0; r < m; r++) { 2355 a[r] = PETSC_MAX_REAL; 2356 if (idx) idx[r] = -1; 2357 } 2358 PetscCall(VecRestoreArrayWrite(v, &a)); 2359 } 2360 PetscFunctionReturn(PETSC_SUCCESS); 2361 } 2362 2363 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2365 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2367 2368 /* Get offdiagIdx[] for implicit 0.0 */ 2369 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2370 ba = bav; 2371 bi = b->i; 2372 bj = b->j; 2373 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2374 for (r = 0; r < m; r++) { 2375 ncols = bi[r + 1] - bi[r]; 2376 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2377 offdiagA[r] = *ba; 2378 offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j = 0; j < ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols - 1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j = 0; j < ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2408 offdiagA[r] = *ba; 2409 offdiagIdx[r] = cmap[*bj]; 2410 } 2411 ba++; 2412 bj++; 2413 } 2414 } 2415 2416 PetscCall(VecGetArrayWrite(v, &a)); 2417 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2418 for (r = 0; r < m; ++r) { 2419 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) idx[r] = cstart + diagIdx[r]; 2422 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2423 a[r] = diagA[r]; 2424 if (idx) { 2425 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2426 idx[r] = cstart + diagIdx[r]; 2427 } else idx[r] = offdiagIdx[r]; 2428 } 2429 } else { 2430 a[r] = offdiagA[r]; 2431 if (idx) idx[r] = offdiagIdx[r]; 2432 } 2433 } 2434 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2435 PetscCall(VecRestoreArrayWrite(v, &a)); 2436 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2438 PetscCall(VecDestroy(&diagV)); 2439 PetscCall(VecDestroy(&offdiagV)); 2440 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2441 PetscFunctionReturn(PETSC_SUCCESS); 2442 } 2443 2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2445 { 2446 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2447 PetscInt m = A->rmap->n, n = A->cmap->n; 2448 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2449 PetscInt *cmap = mat->garray; 2450 PetscInt *diagIdx, *offdiagIdx; 2451 Vec diagV, offdiagV; 2452 PetscScalar *a, *diagA, *offdiagA; 2453 const PetscScalar *ba, *bav; 2454 PetscInt r, j, col, ncols, *bi, *bj; 2455 Mat B = mat->B; 2456 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2457 2458 PetscFunctionBegin; 2459 /* When a process holds entire A and other processes have no entry */ 2460 if (A->cmap->N == n) { 2461 PetscCall(VecGetArrayWrite(v, &diagA)); 2462 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2463 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2464 PetscCall(VecDestroy(&diagV)); 2465 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2466 PetscFunctionReturn(PETSC_SUCCESS); 2467 } else if (n == 0) { 2468 if (m) { 2469 PetscCall(VecGetArrayWrite(v, &a)); 2470 for (r = 0; r < m; r++) { 2471 a[r] = PETSC_MIN_REAL; 2472 if (idx) idx[r] = -1; 2473 } 2474 PetscCall(VecRestoreArrayWrite(v, &a)); 2475 } 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2481 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2483 2484 /* Get offdiagIdx[] for implicit 0.0 */ 2485 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2486 ba = bav; 2487 bi = b->i; 2488 bj = b->j; 2489 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2490 for (r = 0; r < m; r++) { 2491 ncols = bi[r + 1] - bi[r]; 2492 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2493 offdiagA[r] = *ba; 2494 offdiagIdx[r] = cmap[0]; 2495 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2496 offdiagA[r] = 0.0; 2497 2498 /* Find first hole in the cmap */ 2499 for (j = 0; j < ncols; j++) { 2500 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2501 if (col > j && j < cstart) { 2502 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2503 break; 2504 } else if (col > j + n && j >= cstart) { 2505 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2506 break; 2507 } 2508 } 2509 if (j == ncols && ncols < A->cmap->N - n) { 2510 /* a hole is outside compressed Bcols */ 2511 if (ncols == 0) { 2512 if (cstart) { 2513 offdiagIdx[r] = 0; 2514 } else offdiagIdx[r] = cend; 2515 } else { /* ncols > 0 */ 2516 offdiagIdx[r] = cmap[ncols - 1] + 1; 2517 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2518 } 2519 } 2520 } 2521 2522 for (j = 0; j < ncols; j++) { 2523 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2524 offdiagA[r] = *ba; 2525 offdiagIdx[r] = cmap[*bj]; 2526 } 2527 ba++; 2528 bj++; 2529 } 2530 } 2531 2532 PetscCall(VecGetArrayWrite(v, &a)); 2533 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2534 for (r = 0; r < m; ++r) { 2535 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) idx[r] = cstart + diagIdx[r]; 2538 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 if (idx) { 2541 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2542 idx[r] = cstart + diagIdx[r]; 2543 } else idx[r] = offdiagIdx[r]; 2544 } 2545 } else { 2546 a[r] = offdiagA[r]; 2547 if (idx) idx[r] = offdiagIdx[r]; 2548 } 2549 } 2550 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2551 PetscCall(VecRestoreArrayWrite(v, &a)); 2552 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2554 PetscCall(VecDestroy(&diagV)); 2555 PetscCall(VecDestroy(&offdiagV)); 2556 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2561 { 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2566 *newmat = *dummy; 2567 PetscCall(PetscFree(dummy)); 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2574 2575 PetscFunctionBegin; 2576 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2577 A->factorerrortype = a->A->factorerrortype; 2578 PetscFunctionReturn(PETSC_SUCCESS); 2579 } 2580 2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2582 { 2583 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2584 2585 PetscFunctionBegin; 2586 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2587 PetscCall(MatSetRandom(aij->A, rctx)); 2588 if (x->assembled) { 2589 PetscCall(MatSetRandom(aij->B, rctx)); 2590 } else { 2591 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2592 } 2593 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2594 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2599 { 2600 PetscFunctionBegin; 2601 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2602 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2603 PetscFunctionReturn(PETSC_SUCCESS); 2604 } 2605 2606 /*@ 2607 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2608 2609 Not Collective 2610 2611 Input Parameter: 2612 . A - the matrix 2613 2614 Output Parameter: 2615 . nz - the number of nonzeros 2616 2617 Level: advanced 2618 2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2620 @*/ 2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2622 { 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2624 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2625 PetscBool isaij; 2626 2627 PetscFunctionBegin; 2628 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2629 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2630 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 /*@ 2635 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2636 2637 Collective 2638 2639 Input Parameters: 2640 + A - the matrix 2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2642 2643 Level: advanced 2644 2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2646 @*/ 2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2648 { 2649 PetscFunctionBegin; 2650 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2655 { 2656 PetscBool sc = PETSC_FALSE, flg; 2657 2658 PetscFunctionBegin; 2659 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2660 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2661 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2662 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2663 PetscOptionsHeadEnd(); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2668 { 2669 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2670 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2671 2672 PetscFunctionBegin; 2673 if (!Y->preallocated) { 2674 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2675 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2676 PetscInt nonew = aij->nonew; 2677 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2678 aij->nonew = nonew; 2679 } 2680 PetscCall(MatShift_Basic(Y, a)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2690 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2691 if (d) { 2692 PetscInt rstart; 2693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2694 *d += rstart; 2695 } 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2702 2703 PetscFunctionBegin; 2704 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2709 { 2710 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2711 2712 PetscFunctionBegin; 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2714 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2719 MatGetRow_MPIAIJ, 2720 MatRestoreRow_MPIAIJ, 2721 MatMult_MPIAIJ, 2722 /* 4*/ MatMultAdd_MPIAIJ, 2723 MatMultTranspose_MPIAIJ, 2724 MatMultTransposeAdd_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*10*/ NULL, 2729 NULL, 2730 NULL, 2731 MatSOR_MPIAIJ, 2732 MatTranspose_MPIAIJ, 2733 /*15*/ MatGetInfo_MPIAIJ, 2734 MatEqual_MPIAIJ, 2735 MatGetDiagonal_MPIAIJ, 2736 MatDiagonalScale_MPIAIJ, 2737 MatNorm_MPIAIJ, 2738 /*20*/ MatAssemblyBegin_MPIAIJ, 2739 MatAssemblyEnd_MPIAIJ, 2740 MatSetOption_MPIAIJ, 2741 MatZeroEntries_MPIAIJ, 2742 /*24*/ MatZeroRows_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*29*/ MatSetUp_MPI_Hash, 2748 NULL, 2749 NULL, 2750 MatGetDiagonalBlock_MPIAIJ, 2751 NULL, 2752 /*34*/ MatDuplicate_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 NULL, 2757 /*39*/ MatAXPY_MPIAIJ, 2758 MatCreateSubMatrices_MPIAIJ, 2759 MatIncreaseOverlap_MPIAIJ, 2760 MatGetValues_MPIAIJ, 2761 MatCopy_MPIAIJ, 2762 /*44*/ MatGetRowMax_MPIAIJ, 2763 MatScale_MPIAIJ, 2764 MatShift_MPIAIJ, 2765 MatDiagonalSet_MPIAIJ, 2766 MatZeroRowsColumns_MPIAIJ, 2767 /*49*/ MatSetRandom_MPIAIJ, 2768 MatGetRowIJ_MPIAIJ, 2769 MatRestoreRowIJ_MPIAIJ, 2770 NULL, 2771 NULL, 2772 /*54*/ MatFDColoringCreate_MPIXAIJ, 2773 NULL, 2774 MatSetUnfactored_MPIAIJ, 2775 MatPermute_MPIAIJ, 2776 NULL, 2777 /*59*/ MatCreateSubMatrix_MPIAIJ, 2778 MatDestroy_MPIAIJ, 2779 MatView_MPIAIJ, 2780 NULL, 2781 NULL, 2782 /*64*/ NULL, 2783 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2788 MatGetRowMinAbs_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*75*/ MatFDColoringApply_AIJ, 2794 MatSetFromOptions_MPIAIJ, 2795 NULL, 2796 NULL, 2797 MatFindZeroDiagonals_MPIAIJ, 2798 /*80*/ NULL, 2799 NULL, 2800 NULL, 2801 /*83*/ MatLoad_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 NULL, 2807 /*89*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2813 NULL, 2814 NULL, 2815 NULL, 2816 MatBindToCPU_MPIAIJ, 2817 /*99*/ MatProductSetFromOptions_MPIAIJ, 2818 NULL, 2819 NULL, 2820 MatConjugate_MPIAIJ, 2821 NULL, 2822 /*104*/ MatSetValuesRow_MPIAIJ, 2823 MatRealPart_MPIAIJ, 2824 MatImaginaryPart_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*109*/ NULL, 2828 NULL, 2829 MatGetRowMin_MPIAIJ, 2830 NULL, 2831 MatMissingDiagonal_MPIAIJ, 2832 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2833 NULL, 2834 MatGetGhosts_MPIAIJ, 2835 NULL, 2836 NULL, 2837 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2838 NULL, 2839 NULL, 2840 NULL, 2841 MatGetMultiProcBlock_MPIAIJ, 2842 /*124*/ MatFindNonzeroRows_MPIAIJ, 2843 MatGetColumnReductions_MPIAIJ, 2844 MatInvertBlockDiagonal_MPIAIJ, 2845 MatInvertVariableBlockDiagonal_MPIAIJ, 2846 MatCreateSubMatricesMPI_MPIAIJ, 2847 /*129*/ NULL, 2848 NULL, 2849 NULL, 2850 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2851 NULL, 2852 /*134*/ NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 NULL, 2857 /*139*/ MatSetBlockSizes_MPIAIJ, 2858 NULL, 2859 NULL, 2860 MatFDColoringSetUp_MPIXAIJ, 2861 MatFindOffBlockDiagonalEntries_MPIAIJ, 2862 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2863 /*145*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCreateGraph_Simple_AIJ, 2867 NULL, 2868 /*150*/ NULL, 2869 MatEliminateZeros_MPIAIJ, 2870 MatGetRowSumAbs_MPIAIJ, 2871 NULL, 2872 NULL, 2873 NULL}; 2874 2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2876 { 2877 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2878 2879 PetscFunctionBegin; 2880 PetscCall(MatStoreValues(aij->A)); 2881 PetscCall(MatStoreValues(aij->B)); 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2888 2889 PetscFunctionBegin; 2890 PetscCall(MatRetrieveValues(aij->A)); 2891 PetscCall(MatRetrieveValues(aij->B)); 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2896 { 2897 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2898 PetscMPIInt size; 2899 2900 PetscFunctionBegin; 2901 if (B->hash_active) { 2902 B->ops[0] = b->cops; 2903 B->hash_active = PETSC_FALSE; 2904 } 2905 PetscCall(PetscLayoutSetUp(B->rmap)); 2906 PetscCall(PetscLayoutSetUp(B->cmap)); 2907 2908 #if defined(PETSC_USE_CTABLE) 2909 PetscCall(PetscHMapIDestroy(&b->colmap)); 2910 #else 2911 PetscCall(PetscFree(b->colmap)); 2912 #endif 2913 PetscCall(PetscFree(b->garray)); 2914 PetscCall(VecDestroy(&b->lvec)); 2915 PetscCall(VecScatterDestroy(&b->Mvctx)); 2916 2917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2918 2919 MatSeqXAIJGetOptions_Private(b->B); 2920 PetscCall(MatDestroy(&b->B)); 2921 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2922 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2923 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2924 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2925 MatSeqXAIJRestoreOptions_Private(b->B); 2926 2927 MatSeqXAIJGetOptions_Private(b->A); 2928 PetscCall(MatDestroy(&b->A)); 2929 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2930 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2931 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2932 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2933 MatSeqXAIJRestoreOptions_Private(b->A); 2934 2935 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2936 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2937 B->preallocated = PETSC_TRUE; 2938 B->was_assembled = PETSC_FALSE; 2939 B->assembled = PETSC_FALSE; 2940 PetscFunctionReturn(PETSC_SUCCESS); 2941 } 2942 2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2944 { 2945 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2946 /* Save the nonzero states of the component matrices because those are what are used to determine 2947 the nonzero state of mat */ 2948 PetscObjectState diagstate = b->A->nonzerostate, offdiagstate = b->B->nonzerostate; 2949 2950 PetscFunctionBegin; 2951 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2952 PetscCall(PetscLayoutSetUp(B->rmap)); 2953 PetscCall(PetscLayoutSetUp(B->cmap)); 2954 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2955 else { 2956 #if defined(PETSC_USE_CTABLE) 2957 PetscCall(PetscHMapIDestroy(&b->colmap)); 2958 #else 2959 PetscCall(PetscFree(b->colmap)); 2960 #endif 2961 PetscCall(PetscFree(b->garray)); 2962 PetscCall(VecDestroy(&b->lvec)); 2963 } 2964 PetscCall(VecScatterDestroy(&b->Mvctx)); 2965 2966 PetscCall(MatResetPreallocation(b->A)); 2967 PetscCall(MatResetPreallocation(b->B)); 2968 B->preallocated = PETSC_TRUE; 2969 B->was_assembled = PETSC_FALSE; 2970 B->assembled = PETSC_FALSE; 2971 b->A->nonzerostate = ++diagstate, b->B->nonzerostate = ++offdiagstate; 2972 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2973 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2978 { 2979 Mat mat; 2980 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2981 2982 PetscFunctionBegin; 2983 *newmat = NULL; 2984 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2985 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2986 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2987 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2988 a = (Mat_MPIAIJ *)mat->data; 2989 2990 mat->factortype = matin->factortype; 2991 mat->assembled = matin->assembled; 2992 mat->insertmode = NOT_SET_VALUES; 2993 2994 a->size = oldmat->size; 2995 a->rank = oldmat->rank; 2996 a->donotstash = oldmat->donotstash; 2997 a->roworiented = oldmat->roworiented; 2998 a->rowindices = NULL; 2999 a->rowvalues = NULL; 3000 a->getrowactive = PETSC_FALSE; 3001 3002 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3003 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3004 if (matin->hash_active) { 3005 PetscCall(MatSetUp(mat)); 3006 } else { 3007 mat->preallocated = matin->preallocated; 3008 if (oldmat->colmap) { 3009 #if defined(PETSC_USE_CTABLE) 3010 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3011 #else 3012 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3013 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3014 #endif 3015 } else a->colmap = NULL; 3016 if (oldmat->garray) { 3017 PetscInt len; 3018 len = oldmat->B->cmap->n; 3019 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3020 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3021 } else a->garray = NULL; 3022 3023 /* It may happen MatDuplicate is called with a non-assembled matrix 3024 In fact, MatDuplicate only requires the matrix to be preallocated 3025 This may happen inside a DMCreateMatrix_Shell */ 3026 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3027 if (oldmat->Mvctx) { 3028 a->Mvctx = oldmat->Mvctx; 3029 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3030 } 3031 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3032 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3033 } 3034 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3035 *newmat = mat; 3036 PetscFunctionReturn(PETSC_SUCCESS); 3037 } 3038 3039 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3040 { 3041 PetscBool isbinary, ishdf5; 3042 3043 PetscFunctionBegin; 3044 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3045 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3046 /* force binary viewer to load .info file if it has not yet done so */ 3047 PetscCall(PetscViewerSetUp(viewer)); 3048 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3049 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3050 if (isbinary) { 3051 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3052 } else if (ishdf5) { 3053 #if defined(PETSC_HAVE_HDF5) 3054 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3055 #else 3056 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3057 #endif 3058 } else { 3059 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3060 } 3061 PetscFunctionReturn(PETSC_SUCCESS); 3062 } 3063 3064 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3065 { 3066 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3067 PetscInt *rowidxs, *colidxs; 3068 PetscScalar *matvals; 3069 3070 PetscFunctionBegin; 3071 PetscCall(PetscViewerSetUp(viewer)); 3072 3073 /* read in matrix header */ 3074 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3075 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3076 M = header[1]; 3077 N = header[2]; 3078 nz = header[3]; 3079 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3080 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3081 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3082 3083 /* set block sizes from the viewer's .info file */ 3084 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3085 /* set global sizes if not set already */ 3086 if (mat->rmap->N < 0) mat->rmap->N = M; 3087 if (mat->cmap->N < 0) mat->cmap->N = N; 3088 PetscCall(PetscLayoutSetUp(mat->rmap)); 3089 PetscCall(PetscLayoutSetUp(mat->cmap)); 3090 3091 /* check if the matrix sizes are correct */ 3092 PetscCall(MatGetSize(mat, &rows, &cols)); 3093 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3094 3095 /* read in row lengths and build row indices */ 3096 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3097 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3099 rowidxs[0] = 0; 3100 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3101 if (nz != PETSC_INT_MAX) { 3102 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3103 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3104 } 3105 3106 /* read in column indices and matrix values */ 3107 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3108 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3109 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3110 /* store matrix indices and values */ 3111 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3112 PetscCall(PetscFree(rowidxs)); 3113 PetscCall(PetscFree2(colidxs, matvals)); 3114 PetscFunctionReturn(PETSC_SUCCESS); 3115 } 3116 3117 /* Not scalable because of ISAllGather() unless getting all columns. */ 3118 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3119 { 3120 IS iscol_local; 3121 PetscBool isstride; 3122 PetscMPIInt gisstride = 0; 3123 3124 PetscFunctionBegin; 3125 /* check if we are grabbing all columns*/ 3126 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3127 3128 if (isstride) { 3129 PetscInt start, len, mstart, mlen; 3130 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3131 PetscCall(ISGetLocalSize(iscol, &len)); 3132 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3133 if (mstart == start && mlen - mstart == len) gisstride = 1; 3134 } 3135 3136 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3137 if (gisstride) { 3138 PetscInt N; 3139 PetscCall(MatGetSize(mat, NULL, &N)); 3140 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3141 PetscCall(ISSetIdentity(iscol_local)); 3142 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3143 } else { 3144 PetscInt cbs; 3145 PetscCall(ISGetBlockSize(iscol, &cbs)); 3146 PetscCall(ISAllGather(iscol, &iscol_local)); 3147 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3148 } 3149 3150 *isseq = iscol_local; 3151 PetscFunctionReturn(PETSC_SUCCESS); 3152 } 3153 3154 /* 3155 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3156 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3157 3158 Input Parameters: 3159 + mat - matrix 3160 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3161 i.e., mat->rstart <= isrow[i] < mat->rend 3162 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3163 i.e., mat->cstart <= iscol[i] < mat->cend 3164 3165 Output Parameters: 3166 + isrow_d - sequential row index set for retrieving mat->A 3167 . iscol_d - sequential column index set for retrieving mat->A 3168 . iscol_o - sequential column index set for retrieving mat->B 3169 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3170 */ 3171 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3172 { 3173 Vec x, cmap; 3174 const PetscInt *is_idx; 3175 PetscScalar *xarray, *cmaparray; 3176 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3178 Mat B = a->B; 3179 Vec lvec = a->lvec, lcmap; 3180 PetscInt i, cstart, cend, Bn = B->cmap->N; 3181 MPI_Comm comm; 3182 VecScatter Mvctx = a->Mvctx; 3183 3184 PetscFunctionBegin; 3185 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3186 PetscCall(ISGetLocalSize(iscol, &ncols)); 3187 3188 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3189 PetscCall(MatCreateVecs(mat, &x, NULL)); 3190 PetscCall(VecSet(x, -1.0)); 3191 PetscCall(VecDuplicate(x, &cmap)); 3192 PetscCall(VecSet(cmap, -1.0)); 3193 3194 /* Get start indices */ 3195 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3196 isstart -= ncols; 3197 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3198 3199 PetscCall(ISGetIndices(iscol, &is_idx)); 3200 PetscCall(VecGetArray(x, &xarray)); 3201 PetscCall(VecGetArray(cmap, &cmaparray)); 3202 PetscCall(PetscMalloc1(ncols, &idx)); 3203 for (i = 0; i < ncols; i++) { 3204 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3205 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3206 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3207 } 3208 PetscCall(VecRestoreArray(x, &xarray)); 3209 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3210 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3211 3212 /* Get iscol_d */ 3213 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3214 PetscCall(ISGetBlockSize(iscol, &i)); 3215 PetscCall(ISSetBlockSize(*iscol_d, i)); 3216 3217 /* Get isrow_d */ 3218 PetscCall(ISGetLocalSize(isrow, &m)); 3219 rstart = mat->rmap->rstart; 3220 PetscCall(PetscMalloc1(m, &idx)); 3221 PetscCall(ISGetIndices(isrow, &is_idx)); 3222 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3223 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3224 3225 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3226 PetscCall(ISGetBlockSize(isrow, &i)); 3227 PetscCall(ISSetBlockSize(*isrow_d, i)); 3228 3229 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3230 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3231 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3232 3233 PetscCall(VecDuplicate(lvec, &lcmap)); 3234 3235 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3236 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3237 3238 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3239 /* off-process column indices */ 3240 count = 0; 3241 PetscCall(PetscMalloc1(Bn, &idx)); 3242 PetscCall(PetscMalloc1(Bn, &cmap1)); 3243 3244 PetscCall(VecGetArray(lvec, &xarray)); 3245 PetscCall(VecGetArray(lcmap, &cmaparray)); 3246 for (i = 0; i < Bn; i++) { 3247 if (PetscRealPart(xarray[i]) > -1.0) { 3248 idx[count] = i; /* local column index in off-diagonal part B */ 3249 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3250 count++; 3251 } 3252 } 3253 PetscCall(VecRestoreArray(lvec, &xarray)); 3254 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3255 3256 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3257 /* cannot ensure iscol_o has same blocksize as iscol! */ 3258 3259 PetscCall(PetscFree(idx)); 3260 *garray = cmap1; 3261 3262 PetscCall(VecDestroy(&x)); 3263 PetscCall(VecDestroy(&cmap)); 3264 PetscCall(VecDestroy(&lcmap)); 3265 PetscFunctionReturn(PETSC_SUCCESS); 3266 } 3267 3268 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3269 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3270 { 3271 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3272 Mat M = NULL; 3273 MPI_Comm comm; 3274 IS iscol_d, isrow_d, iscol_o; 3275 Mat Asub = NULL, Bsub = NULL; 3276 PetscInt n; 3277 3278 PetscFunctionBegin; 3279 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3280 3281 if (call == MAT_REUSE_MATRIX) { 3282 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3283 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3284 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3285 3286 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3287 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3288 3289 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3290 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3291 3292 /* Update diagonal and off-diagonal portions of submat */ 3293 asub = (Mat_MPIAIJ *)(*submat)->data; 3294 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3295 PetscCall(ISGetLocalSize(iscol_o, &n)); 3296 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3297 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3298 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3299 3300 } else { /* call == MAT_INITIAL_MATRIX) */ 3301 const PetscInt *garray; 3302 PetscInt BsubN; 3303 3304 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3305 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3306 3307 /* Create local submatrices Asub and Bsub */ 3308 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3309 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3310 3311 /* Create submatrix M */ 3312 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3313 3314 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3315 asub = (Mat_MPIAIJ *)M->data; 3316 3317 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3318 n = asub->B->cmap->N; 3319 if (BsubN > n) { 3320 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3321 const PetscInt *idx; 3322 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3323 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3324 3325 PetscCall(PetscMalloc1(n, &idx_new)); 3326 j = 0; 3327 PetscCall(ISGetIndices(iscol_o, &idx)); 3328 for (i = 0; i < n; i++) { 3329 if (j >= BsubN) break; 3330 while (subgarray[i] > garray[j]) j++; 3331 3332 if (subgarray[i] == garray[j]) { 3333 idx_new[i] = idx[j++]; 3334 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3335 } 3336 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3337 3338 PetscCall(ISDestroy(&iscol_o)); 3339 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3340 3341 } else if (BsubN < n) { 3342 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3343 } 3344 3345 PetscCall(PetscFree(garray)); 3346 *submat = M; 3347 3348 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3349 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3350 PetscCall(ISDestroy(&isrow_d)); 3351 3352 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3353 PetscCall(ISDestroy(&iscol_d)); 3354 3355 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3356 PetscCall(ISDestroy(&iscol_o)); 3357 } 3358 PetscFunctionReturn(PETSC_SUCCESS); 3359 } 3360 3361 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3362 { 3363 IS iscol_local = NULL, isrow_d; 3364 PetscInt csize; 3365 PetscInt n, i, j, start, end; 3366 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3367 MPI_Comm comm; 3368 3369 PetscFunctionBegin; 3370 /* If isrow has same processor distribution as mat, 3371 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3372 if (call == MAT_REUSE_MATRIX) { 3373 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3374 if (isrow_d) { 3375 sameRowDist = PETSC_TRUE; 3376 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3377 } else { 3378 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3379 if (iscol_local) { 3380 sameRowDist = PETSC_TRUE; 3381 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3382 } 3383 } 3384 } else { 3385 /* Check if isrow has same processor distribution as mat */ 3386 sameDist[0] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(isrow, &n)); 3388 if (!n) { 3389 sameDist[0] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(isrow, &i, &j)); 3392 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3394 } 3395 3396 /* Check if iscol has same processor distribution as mat */ 3397 sameDist[1] = PETSC_FALSE; 3398 PetscCall(ISGetLocalSize(iscol, &n)); 3399 if (!n) { 3400 sameDist[1] = PETSC_TRUE; 3401 } else { 3402 PetscCall(ISGetMinMax(iscol, &i, &j)); 3403 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3404 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3405 } 3406 3407 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3408 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3409 sameRowDist = tsameDist[0]; 3410 } 3411 3412 if (sameRowDist) { 3413 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3414 /* isrow and iscol have same processor distribution as mat */ 3415 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3416 PetscFunctionReturn(PETSC_SUCCESS); 3417 } else { /* sameRowDist */ 3418 /* isrow has same processor distribution as mat */ 3419 if (call == MAT_INITIAL_MATRIX) { 3420 PetscBool sorted; 3421 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3422 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3423 PetscCall(ISGetSize(iscol, &i)); 3424 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3425 3426 PetscCall(ISSorted(iscol_local, &sorted)); 3427 if (sorted) { 3428 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 } else { /* call == MAT_REUSE_MATRIX */ 3433 IS iscol_sub; 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3435 if (iscol_sub) { 3436 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3437 PetscFunctionReturn(PETSC_SUCCESS); 3438 } 3439 } 3440 } 3441 } 3442 3443 /* General case: iscol -> iscol_local which has global size of iscol */ 3444 if (call == MAT_REUSE_MATRIX) { 3445 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3446 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3447 } else { 3448 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3449 } 3450 3451 PetscCall(ISGetLocalSize(iscol, &csize)); 3452 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3453 3454 if (call == MAT_INITIAL_MATRIX) { 3455 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3456 PetscCall(ISDestroy(&iscol_local)); 3457 } 3458 PetscFunctionReturn(PETSC_SUCCESS); 3459 } 3460 3461 /*@C 3462 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3463 and "off-diagonal" part of the matrix in CSR format. 3464 3465 Collective 3466 3467 Input Parameters: 3468 + comm - MPI communicator 3469 . A - "diagonal" portion of matrix 3470 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3471 - garray - global index of `B` columns 3472 3473 Output Parameter: 3474 . mat - the matrix, with input `A` as its local diagonal matrix 3475 3476 Level: advanced 3477 3478 Notes: 3479 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3480 3481 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3482 3483 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3484 @*/ 3485 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3486 { 3487 Mat_MPIAIJ *maij; 3488 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3489 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3490 const PetscScalar *oa; 3491 Mat Bnew; 3492 PetscInt m, n, N; 3493 MatType mpi_mat_type; 3494 3495 PetscFunctionBegin; 3496 PetscCall(MatCreate(comm, mat)); 3497 PetscCall(MatGetSize(A, &m, &n)); 3498 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3499 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3500 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3501 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3502 3503 /* Get global columns of mat */ 3504 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3505 3506 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3507 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3508 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3509 PetscCall(MatSetType(*mat, mpi_mat_type)); 3510 3511 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3512 maij = (Mat_MPIAIJ *)(*mat)->data; 3513 3514 (*mat)->preallocated = PETSC_TRUE; 3515 3516 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3517 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3518 3519 /* Set A as diagonal portion of *mat */ 3520 maij->A = A; 3521 3522 nz = oi[m]; 3523 for (i = 0; i < nz; i++) { 3524 col = oj[i]; 3525 oj[i] = garray[col]; 3526 } 3527 3528 /* Set Bnew as off-diagonal portion of *mat */ 3529 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3530 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3531 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3532 bnew = (Mat_SeqAIJ *)Bnew->data; 3533 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3534 maij->B = Bnew; 3535 3536 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3537 3538 b->free_a = PETSC_FALSE; 3539 b->free_ij = PETSC_FALSE; 3540 PetscCall(MatDestroy(&B)); 3541 3542 bnew->free_a = PETSC_TRUE; 3543 bnew->free_ij = PETSC_TRUE; 3544 3545 /* condense columns of maij->B */ 3546 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3547 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3548 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3549 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3550 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3555 3556 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3557 { 3558 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3559 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3560 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3561 Mat M, Msub, B = a->B; 3562 MatScalar *aa; 3563 Mat_SeqAIJ *aij; 3564 PetscInt *garray = a->garray, *colsub, Ncols; 3565 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3566 IS iscol_sub, iscmap; 3567 const PetscInt *is_idx, *cmap; 3568 PetscBool allcolumns = PETSC_FALSE; 3569 MPI_Comm comm; 3570 3571 PetscFunctionBegin; 3572 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3573 if (call == MAT_REUSE_MATRIX) { 3574 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3575 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3576 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3577 3578 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3579 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3580 3581 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3582 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3583 3584 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3585 3586 } else { /* call == MAT_INITIAL_MATRIX) */ 3587 PetscBool flg; 3588 3589 PetscCall(ISGetLocalSize(iscol, &n)); 3590 PetscCall(ISGetSize(iscol, &Ncols)); 3591 3592 /* (1) iscol -> nonscalable iscol_local */ 3593 /* Check for special case: each processor gets entire matrix columns */ 3594 PetscCall(ISIdentity(iscol_local, &flg)); 3595 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3596 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3597 if (allcolumns) { 3598 iscol_sub = iscol_local; 3599 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3600 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3601 3602 } else { 3603 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3604 PetscInt *idx, *cmap1, k; 3605 PetscCall(PetscMalloc1(Ncols, &idx)); 3606 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3607 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3608 count = 0; 3609 k = 0; 3610 for (i = 0; i < Ncols; i++) { 3611 j = is_idx[i]; 3612 if (j >= cstart && j < cend) { 3613 /* diagonal part of mat */ 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } else if (Bn) { 3617 /* off-diagonal part of mat */ 3618 if (j == garray[k]) { 3619 idx[count] = j; 3620 cmap1[count++] = i; /* column index in submat */ 3621 } else if (j > garray[k]) { 3622 while (j > garray[k] && k < Bn - 1) k++; 3623 if (j == garray[k]) { 3624 idx[count] = j; 3625 cmap1[count++] = i; /* column index in submat */ 3626 } 3627 } 3628 } 3629 } 3630 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3631 3632 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3633 PetscCall(ISGetBlockSize(iscol, &cbs)); 3634 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3635 3636 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3637 } 3638 3639 /* (3) Create sequential Msub */ 3640 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3641 } 3642 3643 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3644 aij = (Mat_SeqAIJ *)Msub->data; 3645 ii = aij->i; 3646 PetscCall(ISGetIndices(iscmap, &cmap)); 3647 3648 /* 3649 m - number of local rows 3650 Ncols - number of columns (same on all processors) 3651 rstart - first row in new global matrix generated 3652 */ 3653 PetscCall(MatGetSize(Msub, &m, NULL)); 3654 3655 if (call == MAT_INITIAL_MATRIX) { 3656 /* (4) Create parallel newmat */ 3657 PetscMPIInt rank, size; 3658 PetscInt csize; 3659 3660 PetscCallMPI(MPI_Comm_size(comm, &size)); 3661 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3662 3663 /* 3664 Determine the number of non-zeros in the diagonal and off-diagonal 3665 portions of the matrix in order to do correct preallocation 3666 */ 3667 3668 /* first get start and end of "diagonal" columns */ 3669 PetscCall(ISGetLocalSize(iscol, &csize)); 3670 if (csize == PETSC_DECIDE) { 3671 PetscCall(ISGetSize(isrow, &mglobal)); 3672 if (mglobal == Ncols) { /* square matrix */ 3673 nlocal = m; 3674 } else { 3675 nlocal = Ncols / size + ((Ncols % size) > rank); 3676 } 3677 } else { 3678 nlocal = csize; 3679 } 3680 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3681 rstart = rend - nlocal; 3682 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3683 3684 /* next, compute all the lengths */ 3685 jj = aij->j; 3686 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3687 olens = dlens + m; 3688 for (i = 0; i < m; i++) { 3689 jend = ii[i + 1] - ii[i]; 3690 olen = 0; 3691 dlen = 0; 3692 for (j = 0; j < jend; j++) { 3693 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3694 else dlen++; 3695 jj++; 3696 } 3697 olens[i] = olen; 3698 dlens[i] = dlen; 3699 } 3700 3701 PetscCall(ISGetBlockSize(isrow, &bs)); 3702 PetscCall(ISGetBlockSize(iscol, &cbs)); 3703 3704 PetscCall(MatCreate(comm, &M)); 3705 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3706 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3707 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3708 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3709 PetscCall(PetscFree(dlens)); 3710 3711 } else { /* call == MAT_REUSE_MATRIX */ 3712 M = *newmat; 3713 PetscCall(MatGetLocalSize(M, &i, NULL)); 3714 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3715 PetscCall(MatZeroEntries(M)); 3716 /* 3717 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3718 rather than the slower MatSetValues(). 3719 */ 3720 M->was_assembled = PETSC_TRUE; 3721 M->assembled = PETSC_FALSE; 3722 } 3723 3724 /* (5) Set values of Msub to *newmat */ 3725 PetscCall(PetscMalloc1(count, &colsub)); 3726 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3727 3728 jj = aij->j; 3729 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3730 for (i = 0; i < m; i++) { 3731 row = rstart + i; 3732 nz = ii[i + 1] - ii[i]; 3733 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3734 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3735 jj += nz; 3736 aa += nz; 3737 } 3738 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3739 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3740 3741 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3742 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3743 3744 PetscCall(PetscFree(colsub)); 3745 3746 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3747 if (call == MAT_INITIAL_MATRIX) { 3748 *newmat = M; 3749 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3750 PetscCall(MatDestroy(&Msub)); 3751 3752 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3753 PetscCall(ISDestroy(&iscol_sub)); 3754 3755 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3756 PetscCall(ISDestroy(&iscmap)); 3757 3758 if (iscol_local) { 3759 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3760 PetscCall(ISDestroy(&iscol_local)); 3761 } 3762 } 3763 PetscFunctionReturn(PETSC_SUCCESS); 3764 } 3765 3766 /* 3767 Not great since it makes two copies of the submatrix, first an SeqAIJ 3768 in local and then by concatenating the local matrices the end result. 3769 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3770 3771 This requires a sequential iscol with all indices. 3772 */ 3773 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3774 { 3775 PetscMPIInt rank, size; 3776 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3777 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3778 Mat M, Mreuse; 3779 MatScalar *aa, *vwork; 3780 MPI_Comm comm; 3781 Mat_SeqAIJ *aij; 3782 PetscBool colflag, allcolumns = PETSC_FALSE; 3783 3784 PetscFunctionBegin; 3785 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3786 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3787 PetscCallMPI(MPI_Comm_size(comm, &size)); 3788 3789 /* Check for special case: each processor gets entire matrix columns */ 3790 PetscCall(ISIdentity(iscol, &colflag)); 3791 PetscCall(ISGetLocalSize(iscol, &n)); 3792 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3793 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3794 3795 if (call == MAT_REUSE_MATRIX) { 3796 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3797 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3798 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3799 } else { 3800 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3801 } 3802 3803 /* 3804 m - number of local rows 3805 n - number of columns (same on all processors) 3806 rstart - first row in new global matrix generated 3807 */ 3808 PetscCall(MatGetSize(Mreuse, &m, &n)); 3809 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3810 if (call == MAT_INITIAL_MATRIX) { 3811 aij = (Mat_SeqAIJ *)Mreuse->data; 3812 ii = aij->i; 3813 jj = aij->j; 3814 3815 /* 3816 Determine the number of non-zeros in the diagonal and off-diagonal 3817 portions of the matrix in order to do correct preallocation 3818 */ 3819 3820 /* first get start and end of "diagonal" columns */ 3821 if (csize == PETSC_DECIDE) { 3822 PetscCall(ISGetSize(isrow, &mglobal)); 3823 if (mglobal == n) { /* square matrix */ 3824 nlocal = m; 3825 } else { 3826 nlocal = n / size + ((n % size) > rank); 3827 } 3828 } else { 3829 nlocal = csize; 3830 } 3831 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3832 rstart = rend - nlocal; 3833 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3834 3835 /* next, compute all the lengths */ 3836 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3837 olens = dlens + m; 3838 for (i = 0; i < m; i++) { 3839 jend = ii[i + 1] - ii[i]; 3840 olen = 0; 3841 dlen = 0; 3842 for (j = 0; j < jend; j++) { 3843 if (*jj < rstart || *jj >= rend) olen++; 3844 else dlen++; 3845 jj++; 3846 } 3847 olens[i] = olen; 3848 dlens[i] = dlen; 3849 } 3850 PetscCall(MatCreate(comm, &M)); 3851 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3852 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3853 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3854 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3855 PetscCall(PetscFree(dlens)); 3856 } else { 3857 PetscInt ml, nl; 3858 3859 M = *newmat; 3860 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3861 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3862 PetscCall(MatZeroEntries(M)); 3863 /* 3864 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3865 rather than the slower MatSetValues(). 3866 */ 3867 M->was_assembled = PETSC_TRUE; 3868 M->assembled = PETSC_FALSE; 3869 } 3870 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3871 aij = (Mat_SeqAIJ *)Mreuse->data; 3872 ii = aij->i; 3873 jj = aij->j; 3874 3875 /* trigger copy to CPU if needed */ 3876 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3877 for (i = 0; i < m; i++) { 3878 row = rstart + i; 3879 nz = ii[i + 1] - ii[i]; 3880 cwork = jj; 3881 jj = PetscSafePointerPlusOffset(jj, nz); 3882 vwork = aa; 3883 aa = PetscSafePointerPlusOffset(aa, nz); 3884 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3885 } 3886 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3887 3888 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3889 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3890 *newmat = M; 3891 3892 /* save submatrix used in processor for next request */ 3893 if (call == MAT_INITIAL_MATRIX) { 3894 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3895 PetscCall(MatDestroy(&Mreuse)); 3896 } 3897 PetscFunctionReturn(PETSC_SUCCESS); 3898 } 3899 3900 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3901 { 3902 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3903 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3904 const PetscInt *JJ; 3905 PetscBool nooffprocentries; 3906 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3907 3908 PetscFunctionBegin; 3909 PetscCall(PetscLayoutSetUp(B->rmap)); 3910 PetscCall(PetscLayoutSetUp(B->cmap)); 3911 m = B->rmap->n; 3912 cstart = B->cmap->rstart; 3913 cend = B->cmap->rend; 3914 rstart = B->rmap->rstart; 3915 irstart = Ii[0]; 3916 3917 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3918 3919 if (PetscDefined(USE_DEBUG)) { 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3923 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3924 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3925 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3926 } 3927 } 3928 3929 for (i = 0; i < m; i++) { 3930 nnz = Ii[i + 1] - Ii[i]; 3931 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3932 nnz_max = PetscMax(nnz_max, nnz); 3933 d = 0; 3934 for (j = 0; j < nnz; j++) { 3935 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3936 } 3937 d_nnz[i] = d; 3938 o_nnz[i] = nnz - d; 3939 } 3940 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3941 PetscCall(PetscFree2(d_nnz, o_nnz)); 3942 3943 for (i = 0; i < m; i++) { 3944 ii = i + rstart; 3945 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3946 } 3947 nooffprocentries = B->nooffprocentries; 3948 B->nooffprocentries = PETSC_TRUE; 3949 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3950 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3951 B->nooffprocentries = nooffprocentries; 3952 3953 /* count number of entries below block diagonal */ 3954 PetscCall(PetscFree(Aij->ld)); 3955 PetscCall(PetscCalloc1(m, &ld)); 3956 Aij->ld = ld; 3957 for (i = 0; i < m; i++) { 3958 nnz = Ii[i + 1] - Ii[i]; 3959 j = 0; 3960 while (j < nnz && J[j] < cstart) j++; 3961 ld[i] = j; 3962 if (J) J += nnz; 3963 } 3964 3965 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3966 PetscFunctionReturn(PETSC_SUCCESS); 3967 } 3968 3969 /*@ 3970 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3971 (the default parallel PETSc format). 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . i - the indices into `j` for the start of each local row (indices start with zero) 3978 . j - the column indices for each local row (indices start with zero) 3979 - v - optional values in the matrix 3980 3981 Level: developer 3982 3983 Notes: 3984 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3985 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3986 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3987 3988 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3989 3990 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3991 3992 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3993 3994 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3995 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3996 3997 The format which is used for the sparse matrix input, is equivalent to a 3998 row-major ordering.. i.e for the following matrix, the input data expected is 3999 as shown 4000 .vb 4001 1 0 0 4002 2 0 3 P0 4003 ------- 4004 4 5 6 P1 4005 4006 Process0 [P0] rows_owned=[0,1] 4007 i = {0,1,3} [size = nrow+1 = 2+1] 4008 j = {0,0,2} [size = 3] 4009 v = {1,2,3} [size = 3] 4010 4011 Process1 [P1] rows_owned=[2] 4012 i = {0,3} [size = nrow+1 = 1+1] 4013 j = {0,1,2} [size = 3] 4014 v = {4,5,6} [size = 3] 4015 .ve 4016 4017 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4018 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4019 @*/ 4020 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4021 { 4022 PetscFunctionBegin; 4023 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4024 PetscFunctionReturn(PETSC_SUCCESS); 4025 } 4026 4027 /*@ 4028 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4029 (the default parallel PETSc format). For good matrix assembly performance 4030 the user should preallocate the matrix storage by setting the parameters 4031 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4032 4033 Collective 4034 4035 Input Parameters: 4036 + B - the matrix 4037 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4038 (same value is used for all local rows) 4039 . d_nnz - array containing the number of nonzeros in the various rows of the 4040 DIAGONAL portion of the local submatrix (possibly different for each row) 4041 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4042 The size of this array is equal to the number of local rows, i.e 'm'. 4043 For matrices that will be factored, you must leave room for (and set) 4044 the diagonal entry even if it is zero. 4045 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4046 submatrix (same value is used for all local rows). 4047 - o_nnz - array containing the number of nonzeros in the various rows of the 4048 OFF-DIAGONAL portion of the local submatrix (possibly different for 4049 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4050 structure. The size of this array is equal to the number 4051 of local rows, i.e 'm'. 4052 4053 Example Usage: 4054 Consider the following 8x8 matrix with 34 non-zero values, that is 4055 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4056 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4057 as follows 4058 4059 .vb 4060 1 2 0 | 0 3 0 | 0 4 4061 Proc0 0 5 6 | 7 0 0 | 8 0 4062 9 0 10 | 11 0 0 | 12 0 4063 ------------------------------------- 4064 13 0 14 | 15 16 17 | 0 0 4065 Proc1 0 18 0 | 19 20 21 | 0 0 4066 0 0 0 | 22 23 0 | 24 0 4067 ------------------------------------- 4068 Proc2 25 26 27 | 0 0 28 | 29 0 4069 30 0 0 | 31 32 33 | 0 34 4070 .ve 4071 4072 This can be represented as a collection of submatrices as 4073 .vb 4074 A B C 4075 D E F 4076 G H I 4077 .ve 4078 4079 Where the submatrices A,B,C are owned by proc0, D,E,F are 4080 owned by proc1, G,H,I are owned by proc2. 4081 4082 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4083 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4084 The 'M','N' parameters are 8,8, and have the same values on all procs. 4085 4086 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4087 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4088 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4089 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4090 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4091 matrix, and [DF] as another `MATSEQAIJ` matrix. 4092 4093 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4094 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4095 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4096 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4097 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4098 In this case, the values of `d_nz`, `o_nz` are 4099 .vb 4100 proc0 dnz = 2, o_nz = 2 4101 proc1 dnz = 3, o_nz = 2 4102 proc2 dnz = 1, o_nz = 4 4103 .ve 4104 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4105 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4106 for proc3. i.e we are using 12+15+10=37 storage locations to store 4107 34 values. 4108 4109 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4110 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4111 In the above case the values for `d_nnz`, `o_nnz` are 4112 .vb 4113 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4114 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4115 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4116 .ve 4117 Here the space allocated is sum of all the above values i.e 34, and 4118 hence pre-allocation is perfect. 4119 4120 Level: intermediate 4121 4122 Notes: 4123 If the *_nnz parameter is given then the *_nz parameter is ignored 4124 4125 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4126 storage. The stored row and column indices begin with zero. 4127 See [Sparse Matrices](sec_matsparse) for details. 4128 4129 The parallel matrix is partitioned such that the first m0 rows belong to 4130 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4131 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4132 4133 The DIAGONAL portion of the local submatrix of a processor can be defined 4134 as the submatrix which is obtained by extraction the part corresponding to 4135 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4136 first row that belongs to the processor, r2 is the last row belonging to 4137 the this processor, and c1-c2 is range of indices of the local part of a 4138 vector suitable for applying the matrix to. This is an mxn matrix. In the 4139 common case of a square matrix, the row and column ranges are the same and 4140 the DIAGONAL part is also square. The remaining portion of the local 4141 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4142 4143 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4144 4145 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4146 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4147 You can also run with the option `-info` and look for messages with the string 4148 malloc in them to see if additional memory allocation was needed. 4149 4150 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4151 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4152 @*/ 4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4154 { 4155 PetscFunctionBegin; 4156 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4157 PetscValidType(B, 1); 4158 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4159 PetscFunctionReturn(PETSC_SUCCESS); 4160 } 4161 4162 /*@ 4163 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4164 CSR format for the local rows. 4165 4166 Collective 4167 4168 Input Parameters: 4169 + comm - MPI communicator 4170 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4171 . n - This value should be the same as the local size used in creating the 4172 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4173 calculated if `N` is given) For square matrices n is almost always `m`. 4174 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4175 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4176 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4177 . j - global column indices 4178 - a - optional matrix values 4179 4180 Output Parameter: 4181 . mat - the matrix 4182 4183 Level: intermediate 4184 4185 Notes: 4186 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4187 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4188 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4189 4190 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4191 4192 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4193 4194 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4195 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4196 4197 The format which is used for the sparse matrix input, is equivalent to a 4198 row-major ordering, i.e., for the following matrix, the input data expected is 4199 as shown 4200 .vb 4201 1 0 0 4202 2 0 3 P0 4203 ------- 4204 4 5 6 P1 4205 4206 Process0 [P0] rows_owned=[0,1] 4207 i = {0,1,3} [size = nrow+1 = 2+1] 4208 j = {0,0,2} [size = 3] 4209 v = {1,2,3} [size = 3] 4210 4211 Process1 [P1] rows_owned=[2] 4212 i = {0,3} [size = nrow+1 = 1+1] 4213 j = {0,1,2} [size = 3] 4214 v = {4,5,6} [size = 3] 4215 .ve 4216 4217 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4218 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4219 @*/ 4220 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4221 { 4222 PetscFunctionBegin; 4223 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4224 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4225 PetscCall(MatCreate(comm, mat)); 4226 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4227 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4228 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4229 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4230 PetscFunctionReturn(PETSC_SUCCESS); 4231 } 4232 4233 /*@ 4234 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4235 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4236 from `MatCreateMPIAIJWithArrays()` 4237 4238 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4239 4240 Collective 4241 4242 Input Parameters: 4243 + mat - the matrix 4244 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4245 . n - This value should be the same as the local size used in creating the 4246 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4247 calculated if N is given) For square matrices n is almost always m. 4248 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4249 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4250 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4251 . J - column indices 4252 - v - matrix values 4253 4254 Level: deprecated 4255 4256 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4257 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4258 @*/ 4259 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4260 { 4261 PetscInt nnz, i; 4262 PetscBool nooffprocentries; 4263 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4264 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4265 PetscScalar *ad, *ao; 4266 PetscInt ldi, Iii, md; 4267 const PetscInt *Adi = Ad->i; 4268 PetscInt *ld = Aij->ld; 4269 4270 PetscFunctionBegin; 4271 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4272 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4273 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4274 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4275 4276 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4277 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4278 4279 for (i = 0; i < m; i++) { 4280 if (PetscDefined(USE_DEBUG)) { 4281 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4282 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4283 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4284 } 4285 } 4286 nnz = Ii[i + 1] - Ii[i]; 4287 Iii = Ii[i]; 4288 ldi = ld[i]; 4289 md = Adi[i + 1] - Adi[i]; 4290 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4291 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4292 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4293 ad += md; 4294 ao += nnz - md; 4295 } 4296 nooffprocentries = mat->nooffprocentries; 4297 mat->nooffprocentries = PETSC_TRUE; 4298 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4299 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4300 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4301 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4302 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4303 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4304 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4305 mat->nooffprocentries = nooffprocentries; 4306 PetscFunctionReturn(PETSC_SUCCESS); 4307 } 4308 4309 /*@ 4310 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4311 4312 Collective 4313 4314 Input Parameters: 4315 + mat - the matrix 4316 - v - matrix values, stored by row 4317 4318 Level: intermediate 4319 4320 Notes: 4321 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4322 4323 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4324 4325 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4326 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4327 @*/ 4328 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4329 { 4330 PetscInt nnz, i, m; 4331 PetscBool nooffprocentries; 4332 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4333 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4334 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4335 PetscScalar *ad, *ao; 4336 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4337 PetscInt ldi, Iii, md; 4338 PetscInt *ld = Aij->ld; 4339 4340 PetscFunctionBegin; 4341 m = mat->rmap->n; 4342 4343 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4344 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4345 Iii = 0; 4346 for (i = 0; i < m; i++) { 4347 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4348 ldi = ld[i]; 4349 md = Adi[i + 1] - Adi[i]; 4350 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4351 ad += md; 4352 if (ao) { 4353 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4354 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4355 ao += nnz - md; 4356 } 4357 Iii += nnz; 4358 } 4359 nooffprocentries = mat->nooffprocentries; 4360 mat->nooffprocentries = PETSC_TRUE; 4361 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4362 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4363 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4364 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4365 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4366 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4367 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4368 mat->nooffprocentries = nooffprocentries; 4369 PetscFunctionReturn(PETSC_SUCCESS); 4370 } 4371 4372 /*@ 4373 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4374 (the default parallel PETSc format). For good matrix assembly performance 4375 the user should preallocate the matrix storage by setting the parameters 4376 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4377 4378 Collective 4379 4380 Input Parameters: 4381 + comm - MPI communicator 4382 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4383 This value should be the same as the local size used in creating the 4384 y vector for the matrix-vector product y = Ax. 4385 . n - This value should be the same as the local size used in creating the 4386 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4387 calculated if N is given) For square matrices n is almost always m. 4388 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4389 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4390 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4391 (same value is used for all local rows) 4392 . d_nnz - array containing the number of nonzeros in the various rows of the 4393 DIAGONAL portion of the local submatrix (possibly different for each row) 4394 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4395 The size of this array is equal to the number of local rows, i.e 'm'. 4396 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4397 submatrix (same value is used for all local rows). 4398 - o_nnz - array containing the number of nonzeros in the various rows of the 4399 OFF-DIAGONAL portion of the local submatrix (possibly different for 4400 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4401 structure. The size of this array is equal to the number 4402 of local rows, i.e 'm'. 4403 4404 Output Parameter: 4405 . A - the matrix 4406 4407 Options Database Keys: 4408 + -mat_no_inode - Do not use inodes 4409 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4410 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4411 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4412 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4413 4414 Level: intermediate 4415 4416 Notes: 4417 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4418 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4419 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4420 4421 If the *_nnz parameter is given then the *_nz parameter is ignored 4422 4423 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4424 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4425 storage requirements for this matrix. 4426 4427 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4428 processor than it must be used on all processors that share the object for 4429 that argument. 4430 4431 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4432 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4433 4434 The user MUST specify either the local or global matrix dimensions 4435 (possibly both). 4436 4437 The parallel matrix is partitioned across processors such that the 4438 first `m0` rows belong to process 0, the next `m1` rows belong to 4439 process 1, the next `m2` rows belong to process 2, etc., where 4440 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4441 values corresponding to [m x N] submatrix. 4442 4443 The columns are logically partitioned with the n0 columns belonging 4444 to 0th partition, the next n1 columns belonging to the next 4445 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4446 4447 The DIAGONAL portion of the local submatrix on any given processor 4448 is the submatrix corresponding to the rows and columns m,n 4449 corresponding to the given processor. i.e diagonal matrix on 4450 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4451 etc. The remaining portion of the local submatrix [m x (N-n)] 4452 constitute the OFF-DIAGONAL portion. The example below better 4453 illustrates this concept. The two matrices, the DIAGONAL portion and 4454 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4455 4456 For a square global matrix we define each processor's diagonal portion 4457 to be its local rows and the corresponding columns (a square submatrix); 4458 each processor's off-diagonal portion encompasses the remainder of the 4459 local matrix (a rectangular submatrix). 4460 4461 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4462 4463 When calling this routine with a single process communicator, a matrix of 4464 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4465 type of communicator, use the construction mechanism 4466 .vb 4467 MatCreate(..., &A); 4468 MatSetType(A, MATMPIAIJ); 4469 MatSetSizes(A, m, n, M, N); 4470 MatMPIAIJSetPreallocation(A, ...); 4471 .ve 4472 4473 By default, this format uses inodes (identical nodes) when possible. 4474 We search for consecutive rows with the same nonzero structure, thereby 4475 reusing matrix information to achieve increased efficiency. 4476 4477 Example Usage: 4478 Consider the following 8x8 matrix with 34 non-zero values, that is 4479 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4480 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4481 as follows 4482 4483 .vb 4484 1 2 0 | 0 3 0 | 0 4 4485 Proc0 0 5 6 | 7 0 0 | 8 0 4486 9 0 10 | 11 0 0 | 12 0 4487 ------------------------------------- 4488 13 0 14 | 15 16 17 | 0 0 4489 Proc1 0 18 0 | 19 20 21 | 0 0 4490 0 0 0 | 22 23 0 | 24 0 4491 ------------------------------------- 4492 Proc2 25 26 27 | 0 0 28 | 29 0 4493 30 0 0 | 31 32 33 | 0 34 4494 .ve 4495 4496 This can be represented as a collection of submatrices as 4497 4498 .vb 4499 A B C 4500 D E F 4501 G H I 4502 .ve 4503 4504 Where the submatrices A,B,C are owned by proc0, D,E,F are 4505 owned by proc1, G,H,I are owned by proc2. 4506 4507 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4508 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4509 The 'M','N' parameters are 8,8, and have the same values on all procs. 4510 4511 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4512 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4513 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4514 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4515 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4516 matrix, and [DF] as another SeqAIJ matrix. 4517 4518 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4519 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4520 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4521 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4522 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4523 In this case, the values of `d_nz`,`o_nz` are 4524 .vb 4525 proc0 dnz = 2, o_nz = 2 4526 proc1 dnz = 3, o_nz = 2 4527 proc2 dnz = 1, o_nz = 4 4528 .ve 4529 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4530 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4531 for proc3. i.e we are using 12+15+10=37 storage locations to store 4532 34 values. 4533 4534 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4535 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4536 In the above case the values for d_nnz,o_nnz are 4537 .vb 4538 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4539 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4540 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4541 .ve 4542 Here the space allocated is sum of all the above values i.e 34, and 4543 hence pre-allocation is perfect. 4544 4545 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4546 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4547 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4548 @*/ 4549 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4550 { 4551 PetscMPIInt size; 4552 4553 PetscFunctionBegin; 4554 PetscCall(MatCreate(comm, A)); 4555 PetscCall(MatSetSizes(*A, m, n, M, N)); 4556 PetscCallMPI(MPI_Comm_size(comm, &size)); 4557 if (size > 1) { 4558 PetscCall(MatSetType(*A, MATMPIAIJ)); 4559 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4560 } else { 4561 PetscCall(MatSetType(*A, MATSEQAIJ)); 4562 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4563 } 4564 PetscFunctionReturn(PETSC_SUCCESS); 4565 } 4566 4567 /*MC 4568 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4569 4570 Synopsis: 4571 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4572 4573 Not Collective 4574 4575 Input Parameter: 4576 . A - the `MATMPIAIJ` matrix 4577 4578 Output Parameters: 4579 + Ad - the diagonal portion of the matrix 4580 . Ao - the off-diagonal portion of the matrix 4581 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4582 - ierr - error code 4583 4584 Level: advanced 4585 4586 Note: 4587 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4588 4589 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4590 M*/ 4591 4592 /*MC 4593 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4594 4595 Synopsis: 4596 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4597 4598 Not Collective 4599 4600 Input Parameters: 4601 + A - the `MATMPIAIJ` matrix 4602 . Ad - the diagonal portion of the matrix 4603 . Ao - the off-diagonal portion of the matrix 4604 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4605 - ierr - error code 4606 4607 Level: advanced 4608 4609 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4610 M*/ 4611 4612 /*@C 4613 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4614 4615 Not Collective 4616 4617 Input Parameter: 4618 . A - The `MATMPIAIJ` matrix 4619 4620 Output Parameters: 4621 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4622 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4623 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4624 4625 Level: intermediate 4626 4627 Note: 4628 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4629 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4630 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4631 local column numbers to global column numbers in the original matrix. 4632 4633 Fortran Notes: 4634 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4635 4636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4637 @*/ 4638 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4639 { 4640 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4641 PetscBool flg; 4642 4643 PetscFunctionBegin; 4644 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4645 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4646 if (Ad) *Ad = a->A; 4647 if (Ao) *Ao = a->B; 4648 if (colmap) *colmap = a->garray; 4649 PetscFunctionReturn(PETSC_SUCCESS); 4650 } 4651 4652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4653 { 4654 PetscInt m, N, i, rstart, nnz, Ii; 4655 PetscInt *indx; 4656 PetscScalar *values; 4657 MatType rootType; 4658 4659 PetscFunctionBegin; 4660 PetscCall(MatGetSize(inmat, &m, &N)); 4661 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4662 PetscInt *dnz, *onz, sum, bs, cbs; 4663 4664 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4665 /* Check sum(n) = N */ 4666 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4667 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4668 4669 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4670 rstart -= m; 4671 4672 MatPreallocateBegin(comm, m, n, dnz, onz); 4673 for (i = 0; i < m; i++) { 4674 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4675 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4676 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4677 } 4678 4679 PetscCall(MatCreate(comm, outmat)); 4680 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4681 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4682 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4683 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4684 PetscCall(MatSetType(*outmat, rootType)); 4685 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4686 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4687 MatPreallocateEnd(dnz, onz); 4688 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4689 } 4690 4691 /* numeric phase */ 4692 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4693 for (i = 0; i < m; i++) { 4694 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4695 Ii = i + rstart; 4696 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4697 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4698 } 4699 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4700 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4701 PetscFunctionReturn(PETSC_SUCCESS); 4702 } 4703 4704 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4705 { 4706 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4707 4708 PetscFunctionBegin; 4709 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4710 PetscCall(PetscFree(merge->id_r)); 4711 PetscCall(PetscFree(merge->len_s)); 4712 PetscCall(PetscFree(merge->len_r)); 4713 PetscCall(PetscFree(merge->bi)); 4714 PetscCall(PetscFree(merge->bj)); 4715 PetscCall(PetscFree(merge->buf_ri[0])); 4716 PetscCall(PetscFree(merge->buf_ri)); 4717 PetscCall(PetscFree(merge->buf_rj[0])); 4718 PetscCall(PetscFree(merge->buf_rj)); 4719 PetscCall(PetscFree(merge->coi)); 4720 PetscCall(PetscFree(merge->coj)); 4721 PetscCall(PetscFree(merge->owners_co)); 4722 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4723 PetscCall(PetscFree(merge)); 4724 PetscFunctionReturn(PETSC_SUCCESS); 4725 } 4726 4727 #include <../src/mat/utils/freespace.h> 4728 #include <petscbt.h> 4729 4730 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4731 { 4732 MPI_Comm comm; 4733 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4734 PetscMPIInt size, rank, taga, *len_s; 4735 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4736 PetscMPIInt proc, k; 4737 PetscInt **buf_ri, **buf_rj; 4738 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4739 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4740 MPI_Request *s_waits, *r_waits; 4741 MPI_Status *status; 4742 const MatScalar *aa, *a_a; 4743 MatScalar **abuf_r, *ba_i; 4744 Mat_Merge_SeqsToMPI *merge; 4745 PetscContainer container; 4746 4747 PetscFunctionBegin; 4748 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4749 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4750 4751 PetscCallMPI(MPI_Comm_size(comm, &size)); 4752 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4753 4754 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4755 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4756 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4757 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4758 aa = a_a; 4759 4760 bi = merge->bi; 4761 bj = merge->bj; 4762 buf_ri = merge->buf_ri; 4763 buf_rj = merge->buf_rj; 4764 4765 PetscCall(PetscMalloc1(size, &status)); 4766 owners = merge->rowmap->range; 4767 len_s = merge->len_s; 4768 4769 /* send and recv matrix values */ 4770 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4771 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4772 4773 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4774 for (proc = 0, k = 0; proc < size; proc++) { 4775 if (!len_s[proc]) continue; 4776 i = owners[proc]; 4777 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4778 k++; 4779 } 4780 4781 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4782 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4783 PetscCall(PetscFree(status)); 4784 4785 PetscCall(PetscFree(s_waits)); 4786 PetscCall(PetscFree(r_waits)); 4787 4788 /* insert mat values of mpimat */ 4789 PetscCall(PetscMalloc1(N, &ba_i)); 4790 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4791 4792 for (k = 0; k < merge->nrecv; k++) { 4793 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4794 nrows = *buf_ri_k[k]; 4795 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4796 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4797 } 4798 4799 /* set values of ba */ 4800 m = merge->rowmap->n; 4801 for (i = 0; i < m; i++) { 4802 arow = owners[rank] + i; 4803 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4804 bnzi = bi[i + 1] - bi[i]; 4805 PetscCall(PetscArrayzero(ba_i, bnzi)); 4806 4807 /* add local non-zero vals of this proc's seqmat into ba */ 4808 anzi = ai[arow + 1] - ai[arow]; 4809 aj = a->j + ai[arow]; 4810 aa = a_a + ai[arow]; 4811 nextaj = 0; 4812 for (j = 0; nextaj < anzi; j++) { 4813 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4814 ba_i[j] += aa[nextaj++]; 4815 } 4816 } 4817 4818 /* add received vals into ba */ 4819 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4820 /* i-th row */ 4821 if (i == *nextrow[k]) { 4822 anzi = *(nextai[k] + 1) - *nextai[k]; 4823 aj = buf_rj[k] + *nextai[k]; 4824 aa = abuf_r[k] + *nextai[k]; 4825 nextaj = 0; 4826 for (j = 0; nextaj < anzi; j++) { 4827 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4828 ba_i[j] += aa[nextaj++]; 4829 } 4830 } 4831 nextrow[k]++; 4832 nextai[k]++; 4833 } 4834 } 4835 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4836 } 4837 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4838 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4839 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4840 4841 PetscCall(PetscFree(abuf_r[0])); 4842 PetscCall(PetscFree(abuf_r)); 4843 PetscCall(PetscFree(ba_i)); 4844 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4845 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4846 PetscFunctionReturn(PETSC_SUCCESS); 4847 } 4848 4849 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4850 { 4851 Mat B_mpi; 4852 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4853 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4854 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4855 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4856 PetscInt len, *dnz, *onz, bs, cbs; 4857 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4858 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4859 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4860 MPI_Status *status; 4861 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4862 PetscBT lnkbt; 4863 Mat_Merge_SeqsToMPI *merge; 4864 PetscContainer container; 4865 4866 PetscFunctionBegin; 4867 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4868 4869 /* make sure it is a PETSc comm */ 4870 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4871 PetscCallMPI(MPI_Comm_size(comm, &size)); 4872 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4873 4874 PetscCall(PetscNew(&merge)); 4875 PetscCall(PetscMalloc1(size, &status)); 4876 4877 /* determine row ownership */ 4878 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4879 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4880 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4881 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4882 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4883 PetscCall(PetscMalloc1(size, &len_si)); 4884 PetscCall(PetscMalloc1(size, &merge->len_s)); 4885 4886 m = merge->rowmap->n; 4887 owners = merge->rowmap->range; 4888 4889 /* determine the number of messages to send, their lengths */ 4890 len_s = merge->len_s; 4891 4892 len = 0; /* length of buf_si[] */ 4893 merge->nsend = 0; 4894 for (PetscMPIInt proc = 0; proc < size; proc++) { 4895 len_si[proc] = 0; 4896 if (proc == rank) { 4897 len_s[proc] = 0; 4898 } else { 4899 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4900 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4901 } 4902 if (len_s[proc]) { 4903 merge->nsend++; 4904 nrows = 0; 4905 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4906 if (ai[i + 1] > ai[i]) nrows++; 4907 } 4908 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4909 len += len_si[proc]; 4910 } 4911 } 4912 4913 /* determine the number and length of messages to receive for ij-structure */ 4914 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4915 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4916 4917 /* post the Irecv of j-structure */ 4918 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4919 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4920 4921 /* post the Isend of j-structure */ 4922 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4923 4924 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4925 if (!len_s[proc]) continue; 4926 i = owners[proc]; 4927 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4928 k++; 4929 } 4930 4931 /* receives and sends of j-structure are complete */ 4932 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4933 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4934 4935 /* send and recv i-structure */ 4936 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4937 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4938 4939 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4940 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4941 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4942 if (!len_s[proc]) continue; 4943 /* form outgoing message for i-structure: 4944 buf_si[0]: nrows to be sent 4945 [1:nrows]: row index (global) 4946 [nrows+1:2*nrows+1]: i-structure index 4947 */ 4948 nrows = len_si[proc] / 2 - 1; 4949 buf_si_i = buf_si + nrows + 1; 4950 buf_si[0] = nrows; 4951 buf_si_i[0] = 0; 4952 nrows = 0; 4953 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4954 anzi = ai[i + 1] - ai[i]; 4955 if (anzi) { 4956 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4957 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4958 nrows++; 4959 } 4960 } 4961 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4962 k++; 4963 buf_si += len_si[proc]; 4964 } 4965 4966 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4967 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4968 4969 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4970 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4971 4972 PetscCall(PetscFree(len_si)); 4973 PetscCall(PetscFree(len_ri)); 4974 PetscCall(PetscFree(rj_waits)); 4975 PetscCall(PetscFree2(si_waits, sj_waits)); 4976 PetscCall(PetscFree(ri_waits)); 4977 PetscCall(PetscFree(buf_s)); 4978 PetscCall(PetscFree(status)); 4979 4980 /* compute a local seq matrix in each processor */ 4981 /* allocate bi array and free space for accumulating nonzero column info */ 4982 PetscCall(PetscMalloc1(m + 1, &bi)); 4983 bi[0] = 0; 4984 4985 /* create and initialize a linked list */ 4986 nlnk = N + 1; 4987 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4988 4989 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4990 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4991 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4992 4993 current_space = free_space; 4994 4995 /* determine symbolic info for each local row */ 4996 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4997 4998 for (k = 0; k < merge->nrecv; k++) { 4999 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5000 nrows = *buf_ri_k[k]; 5001 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5002 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5003 } 5004 5005 MatPreallocateBegin(comm, m, n, dnz, onz); 5006 len = 0; 5007 for (i = 0; i < m; i++) { 5008 bnzi = 0; 5009 /* add local non-zero cols of this proc's seqmat into lnk */ 5010 arow = owners[rank] + i; 5011 anzi = ai[arow + 1] - ai[arow]; 5012 aj = a->j + ai[arow]; 5013 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5014 bnzi += nlnk; 5015 /* add received col data into lnk */ 5016 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5017 if (i == *nextrow[k]) { /* i-th row */ 5018 anzi = *(nextai[k] + 1) - *nextai[k]; 5019 aj = buf_rj[k] + *nextai[k]; 5020 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5021 bnzi += nlnk; 5022 nextrow[k]++; 5023 nextai[k]++; 5024 } 5025 } 5026 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5027 5028 /* if free space is not available, make more free space */ 5029 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5030 /* copy data into free space, then initialize lnk */ 5031 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5032 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5033 5034 current_space->array += bnzi; 5035 current_space->local_used += bnzi; 5036 current_space->local_remaining -= bnzi; 5037 5038 bi[i + 1] = bi[i] + bnzi; 5039 } 5040 5041 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5042 5043 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5044 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5045 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5046 5047 /* create symbolic parallel matrix B_mpi */ 5048 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5049 PetscCall(MatCreate(comm, &B_mpi)); 5050 if (n == PETSC_DECIDE) { 5051 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5052 } else { 5053 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5054 } 5055 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5056 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5057 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5058 MatPreallocateEnd(dnz, onz); 5059 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5060 5061 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5062 B_mpi->assembled = PETSC_FALSE; 5063 merge->bi = bi; 5064 merge->bj = bj; 5065 merge->buf_ri = buf_ri; 5066 merge->buf_rj = buf_rj; 5067 merge->coi = NULL; 5068 merge->coj = NULL; 5069 merge->owners_co = NULL; 5070 5071 PetscCall(PetscCommDestroy(&comm)); 5072 5073 /* attach the supporting struct to B_mpi for reuse */ 5074 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5075 PetscCall(PetscContainerSetPointer(container, merge)); 5076 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5077 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5078 PetscCall(PetscContainerDestroy(&container)); 5079 *mpimat = B_mpi; 5080 5081 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5082 PetscFunctionReturn(PETSC_SUCCESS); 5083 } 5084 5085 /*@ 5086 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5087 matrices from each processor 5088 5089 Collective 5090 5091 Input Parameters: 5092 + comm - the communicators the parallel matrix will live on 5093 . seqmat - the input sequential matrices 5094 . m - number of local rows (or `PETSC_DECIDE`) 5095 . n - number of local columns (or `PETSC_DECIDE`) 5096 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5097 5098 Output Parameter: 5099 . mpimat - the parallel matrix generated 5100 5101 Level: advanced 5102 5103 Note: 5104 The dimensions of the sequential matrix in each processor MUST be the same. 5105 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5106 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5107 5108 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5109 @*/ 5110 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5111 { 5112 PetscMPIInt size; 5113 5114 PetscFunctionBegin; 5115 PetscCallMPI(MPI_Comm_size(comm, &size)); 5116 if (size == 1) { 5117 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5118 if (scall == MAT_INITIAL_MATRIX) { 5119 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5120 } else { 5121 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5122 } 5123 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5124 PetscFunctionReturn(PETSC_SUCCESS); 5125 } 5126 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5127 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5128 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5129 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5130 PetscFunctionReturn(PETSC_SUCCESS); 5131 } 5132 5133 /*@ 5134 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5135 5136 Not Collective 5137 5138 Input Parameter: 5139 . A - the matrix 5140 5141 Output Parameter: 5142 . A_loc - the local sequential matrix generated 5143 5144 Level: developer 5145 5146 Notes: 5147 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5148 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5149 `n` is the global column count obtained with `MatGetSize()` 5150 5151 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5152 5153 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5154 5155 Destroy the matrix with `MatDestroy()` 5156 5157 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5158 @*/ 5159 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5160 { 5161 PetscBool mpi; 5162 5163 PetscFunctionBegin; 5164 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5165 if (mpi) { 5166 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5167 } else { 5168 *A_loc = A; 5169 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5170 } 5171 PetscFunctionReturn(PETSC_SUCCESS); 5172 } 5173 5174 /*@ 5175 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5176 5177 Not Collective 5178 5179 Input Parameters: 5180 + A - the matrix 5181 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5182 5183 Output Parameter: 5184 . A_loc - the local sequential matrix generated 5185 5186 Level: developer 5187 5188 Notes: 5189 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5190 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5191 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5192 5193 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5194 5195 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5196 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5197 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5198 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5199 5200 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5201 @*/ 5202 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5203 { 5204 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5205 Mat_SeqAIJ *mat, *a, *b; 5206 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5207 const PetscScalar *aa, *ba, *aav, *bav; 5208 PetscScalar *ca, *cam; 5209 PetscMPIInt size; 5210 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5211 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5212 PetscBool match; 5213 5214 PetscFunctionBegin; 5215 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5216 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5217 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5218 if (size == 1) { 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5221 *A_loc = mpimat->A; 5222 } else if (scall == MAT_REUSE_MATRIX) { 5223 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5224 } 5225 PetscFunctionReturn(PETSC_SUCCESS); 5226 } 5227 5228 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5229 a = (Mat_SeqAIJ *)mpimat->A->data; 5230 b = (Mat_SeqAIJ *)mpimat->B->data; 5231 ai = a->i; 5232 aj = a->j; 5233 bi = b->i; 5234 bj = b->j; 5235 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5236 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5237 aa = aav; 5238 ba = bav; 5239 if (scall == MAT_INITIAL_MATRIX) { 5240 PetscCall(PetscMalloc1(1 + am, &ci)); 5241 ci[0] = 0; 5242 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5243 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5244 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5245 k = 0; 5246 for (i = 0; i < am; i++) { 5247 ncols_o = bi[i + 1] - bi[i]; 5248 ncols_d = ai[i + 1] - ai[i]; 5249 /* off-diagonal portion of A */ 5250 for (jo = 0; jo < ncols_o; jo++) { 5251 col = cmap[*bj]; 5252 if (col >= cstart) break; 5253 cj[k] = col; 5254 bj++; 5255 ca[k++] = *ba++; 5256 } 5257 /* diagonal portion of A */ 5258 for (j = 0; j < ncols_d; j++) { 5259 cj[k] = cstart + *aj++; 5260 ca[k++] = *aa++; 5261 } 5262 /* off-diagonal portion of A */ 5263 for (j = jo; j < ncols_o; j++) { 5264 cj[k] = cmap[*bj++]; 5265 ca[k++] = *ba++; 5266 } 5267 } 5268 /* put together the new matrix */ 5269 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5270 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5271 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5272 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5273 mat->free_a = PETSC_TRUE; 5274 mat->free_ij = PETSC_TRUE; 5275 mat->nonew = 0; 5276 } else if (scall == MAT_REUSE_MATRIX) { 5277 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5278 ci = mat->i; 5279 cj = mat->j; 5280 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5281 for (i = 0; i < am; i++) { 5282 /* off-diagonal portion of A */ 5283 ncols_o = bi[i + 1] - bi[i]; 5284 for (jo = 0; jo < ncols_o; jo++) { 5285 col = cmap[*bj]; 5286 if (col >= cstart) break; 5287 *cam++ = *ba++; 5288 bj++; 5289 } 5290 /* diagonal portion of A */ 5291 ncols_d = ai[i + 1] - ai[i]; 5292 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5293 /* off-diagonal portion of A */ 5294 for (j = jo; j < ncols_o; j++) { 5295 *cam++ = *ba++; 5296 bj++; 5297 } 5298 } 5299 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5300 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5301 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5302 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5303 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5304 PetscFunctionReturn(PETSC_SUCCESS); 5305 } 5306 5307 /*@ 5308 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5309 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5310 5311 Not Collective 5312 5313 Input Parameters: 5314 + A - the matrix 5315 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5316 5317 Output Parameters: 5318 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5319 - A_loc - the local sequential matrix generated 5320 5321 Level: developer 5322 5323 Note: 5324 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5325 part, then those associated with the off-diagonal part (in its local ordering) 5326 5327 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5328 @*/ 5329 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5330 { 5331 Mat Ao, Ad; 5332 const PetscInt *cmap; 5333 PetscMPIInt size; 5334 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5335 5336 PetscFunctionBegin; 5337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5338 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5339 if (size == 1) { 5340 if (scall == MAT_INITIAL_MATRIX) { 5341 PetscCall(PetscObjectReference((PetscObject)Ad)); 5342 *A_loc = Ad; 5343 } else if (scall == MAT_REUSE_MATRIX) { 5344 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5345 } 5346 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5347 PetscFunctionReturn(PETSC_SUCCESS); 5348 } 5349 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5350 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5351 if (f) { 5352 PetscCall((*f)(A, scall, glob, A_loc)); 5353 } else { 5354 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5355 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5356 Mat_SeqAIJ *c; 5357 PetscInt *ai = a->i, *aj = a->j; 5358 PetscInt *bi = b->i, *bj = b->j; 5359 PetscInt *ci, *cj; 5360 const PetscScalar *aa, *ba; 5361 PetscScalar *ca; 5362 PetscInt i, j, am, dn, on; 5363 5364 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5365 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5366 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5367 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5368 if (scall == MAT_INITIAL_MATRIX) { 5369 PetscInt k; 5370 PetscCall(PetscMalloc1(1 + am, &ci)); 5371 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5372 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5373 ci[0] = 0; 5374 for (i = 0, k = 0; i < am; i++) { 5375 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5376 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5377 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5378 /* diagonal portion of A */ 5379 for (j = 0; j < ncols_d; j++, k++) { 5380 cj[k] = *aj++; 5381 ca[k] = *aa++; 5382 } 5383 /* off-diagonal portion of A */ 5384 for (j = 0; j < ncols_o; j++, k++) { 5385 cj[k] = dn + *bj++; 5386 ca[k] = *ba++; 5387 } 5388 } 5389 /* put together the new matrix */ 5390 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5391 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5392 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5393 c = (Mat_SeqAIJ *)(*A_loc)->data; 5394 c->free_a = PETSC_TRUE; 5395 c->free_ij = PETSC_TRUE; 5396 c->nonew = 0; 5397 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5398 } else if (scall == MAT_REUSE_MATRIX) { 5399 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5400 for (i = 0; i < am; i++) { 5401 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5402 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5403 /* diagonal portion of A */ 5404 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5405 /* off-diagonal portion of A */ 5406 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5407 } 5408 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5409 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5410 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5411 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5412 if (glob) { 5413 PetscInt cst, *gidx; 5414 5415 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5416 PetscCall(PetscMalloc1(dn + on, &gidx)); 5417 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5418 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5419 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5420 } 5421 } 5422 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5423 PetscFunctionReturn(PETSC_SUCCESS); 5424 } 5425 5426 /*@C 5427 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5428 5429 Not Collective 5430 5431 Input Parameters: 5432 + A - the matrix 5433 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5434 . row - index set of rows to extract (or `NULL`) 5435 - col - index set of columns to extract (or `NULL`) 5436 5437 Output Parameter: 5438 . A_loc - the local sequential matrix generated 5439 5440 Level: developer 5441 5442 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5443 @*/ 5444 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5445 { 5446 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5447 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5448 IS isrowa, iscola; 5449 Mat *aloc; 5450 PetscBool match; 5451 5452 PetscFunctionBegin; 5453 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5454 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5455 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5456 if (!row) { 5457 start = A->rmap->rstart; 5458 end = A->rmap->rend; 5459 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5460 } else { 5461 isrowa = *row; 5462 } 5463 if (!col) { 5464 start = A->cmap->rstart; 5465 cmap = a->garray; 5466 nzA = a->A->cmap->n; 5467 nzB = a->B->cmap->n; 5468 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5469 ncols = 0; 5470 for (i = 0; i < nzB; i++) { 5471 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5472 else break; 5473 } 5474 imark = i; 5475 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5476 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5477 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5478 } else { 5479 iscola = *col; 5480 } 5481 if (scall != MAT_INITIAL_MATRIX) { 5482 PetscCall(PetscMalloc1(1, &aloc)); 5483 aloc[0] = *A_loc; 5484 } 5485 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5486 if (!col) { /* attach global id of condensed columns */ 5487 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5488 } 5489 *A_loc = aloc[0]; 5490 PetscCall(PetscFree(aloc)); 5491 if (!row) PetscCall(ISDestroy(&isrowa)); 5492 if (!col) PetscCall(ISDestroy(&iscola)); 5493 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5494 PetscFunctionReturn(PETSC_SUCCESS); 5495 } 5496 5497 /* 5498 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5499 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5500 * on a global size. 5501 * */ 5502 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5503 { 5504 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5505 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5506 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5507 PetscMPIInt owner; 5508 PetscSFNode *iremote, *oiremote; 5509 const PetscInt *lrowindices; 5510 PetscSF sf, osf; 5511 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5512 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5513 MPI_Comm comm; 5514 ISLocalToGlobalMapping mapping; 5515 const PetscScalar *pd_a, *po_a; 5516 5517 PetscFunctionBegin; 5518 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5519 /* plocalsize is the number of roots 5520 * nrows is the number of leaves 5521 * */ 5522 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5523 PetscCall(ISGetLocalSize(rows, &nrows)); 5524 PetscCall(PetscCalloc1(nrows, &iremote)); 5525 PetscCall(ISGetIndices(rows, &lrowindices)); 5526 for (i = 0; i < nrows; i++) { 5527 /* Find a remote index and an owner for a row 5528 * The row could be local or remote 5529 * */ 5530 owner = 0; 5531 lidx = 0; 5532 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5533 iremote[i].index = lidx; 5534 iremote[i].rank = owner; 5535 } 5536 /* Create SF to communicate how many nonzero columns for each row */ 5537 PetscCall(PetscSFCreate(comm, &sf)); 5538 /* SF will figure out the number of nonzero columns for each row, and their 5539 * offsets 5540 * */ 5541 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5542 PetscCall(PetscSFSetFromOptions(sf)); 5543 PetscCall(PetscSFSetUp(sf)); 5544 5545 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5546 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5547 PetscCall(PetscCalloc1(nrows, &pnnz)); 5548 roffsets[0] = 0; 5549 roffsets[1] = 0; 5550 for (i = 0; i < plocalsize; i++) { 5551 /* diagonal */ 5552 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5553 /* off-diagonal */ 5554 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5555 /* compute offsets so that we relative location for each row */ 5556 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5557 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5558 } 5559 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5560 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5561 /* 'r' means root, and 'l' means leaf */ 5562 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5563 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5564 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5565 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5566 PetscCall(PetscSFDestroy(&sf)); 5567 PetscCall(PetscFree(roffsets)); 5568 PetscCall(PetscFree(nrcols)); 5569 dntotalcols = 0; 5570 ontotalcols = 0; 5571 ncol = 0; 5572 for (i = 0; i < nrows; i++) { 5573 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5574 ncol = PetscMax(pnnz[i], ncol); 5575 /* diagonal */ 5576 dntotalcols += nlcols[i * 2 + 0]; 5577 /* off-diagonal */ 5578 ontotalcols += nlcols[i * 2 + 1]; 5579 } 5580 /* We do not need to figure the right number of columns 5581 * since all the calculations will be done by going through the raw data 5582 * */ 5583 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5584 PetscCall(MatSetUp(*P_oth)); 5585 PetscCall(PetscFree(pnnz)); 5586 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5587 /* diagonal */ 5588 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5589 /* off-diagonal */ 5590 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5591 /* diagonal */ 5592 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5593 /* off-diagonal */ 5594 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5595 dntotalcols = 0; 5596 ontotalcols = 0; 5597 ntotalcols = 0; 5598 for (i = 0; i < nrows; i++) { 5599 owner = 0; 5600 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5601 /* Set iremote for diag matrix */ 5602 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5603 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5604 iremote[dntotalcols].rank = owner; 5605 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5606 ilocal[dntotalcols++] = ntotalcols++; 5607 } 5608 /* off-diagonal */ 5609 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5610 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5611 oiremote[ontotalcols].rank = owner; 5612 oilocal[ontotalcols++] = ntotalcols++; 5613 } 5614 } 5615 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5616 PetscCall(PetscFree(loffsets)); 5617 PetscCall(PetscFree(nlcols)); 5618 PetscCall(PetscSFCreate(comm, &sf)); 5619 /* P serves as roots and P_oth is leaves 5620 * Diag matrix 5621 * */ 5622 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5623 PetscCall(PetscSFSetFromOptions(sf)); 5624 PetscCall(PetscSFSetUp(sf)); 5625 5626 PetscCall(PetscSFCreate(comm, &osf)); 5627 /* off-diagonal */ 5628 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5629 PetscCall(PetscSFSetFromOptions(osf)); 5630 PetscCall(PetscSFSetUp(osf)); 5631 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5632 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5633 /* operate on the matrix internal data to save memory */ 5634 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5635 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5636 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5637 /* Convert to global indices for diag matrix */ 5638 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5639 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5640 /* We want P_oth store global indices */ 5641 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5642 /* Use memory scalable approach */ 5643 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5644 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5645 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5647 /* Convert back to local indices */ 5648 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5649 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5650 nout = 0; 5651 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5652 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5653 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5654 /* Exchange values */ 5655 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5656 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5657 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5658 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5659 /* Stop PETSc from shrinking memory */ 5660 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5661 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5662 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5663 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5664 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5665 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5666 PetscCall(PetscSFDestroy(&sf)); 5667 PetscCall(PetscSFDestroy(&osf)); 5668 PetscFunctionReturn(PETSC_SUCCESS); 5669 } 5670 5671 /* 5672 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5673 * This supports MPIAIJ and MAIJ 5674 * */ 5675 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5676 { 5677 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5678 Mat_SeqAIJ *p_oth; 5679 IS rows, map; 5680 PetscHMapI hamp; 5681 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5682 MPI_Comm comm; 5683 PetscSF sf, osf; 5684 PetscBool has; 5685 5686 PetscFunctionBegin; 5687 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5688 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5689 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5690 * and then create a submatrix (that often is an overlapping matrix) 5691 * */ 5692 if (reuse == MAT_INITIAL_MATRIX) { 5693 /* Use a hash table to figure out unique keys */ 5694 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5695 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5696 count = 0; 5697 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5698 for (i = 0; i < a->B->cmap->n; i++) { 5699 key = a->garray[i] / dof; 5700 PetscCall(PetscHMapIHas(hamp, key, &has)); 5701 if (!has) { 5702 mapping[i] = count; 5703 PetscCall(PetscHMapISet(hamp, key, count++)); 5704 } else { 5705 /* Current 'i' has the same value the previous step */ 5706 mapping[i] = count - 1; 5707 } 5708 } 5709 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5710 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5711 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5712 PetscCall(PetscCalloc1(htsize, &rowindices)); 5713 off = 0; 5714 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5715 PetscCall(PetscHMapIDestroy(&hamp)); 5716 PetscCall(PetscSortInt(htsize, rowindices)); 5717 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5718 /* In case, the matrix was already created but users want to recreate the matrix */ 5719 PetscCall(MatDestroy(P_oth)); 5720 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5721 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5722 PetscCall(ISDestroy(&map)); 5723 PetscCall(ISDestroy(&rows)); 5724 } else if (reuse == MAT_REUSE_MATRIX) { 5725 /* If matrix was already created, we simply update values using SF objects 5726 * that as attached to the matrix earlier. 5727 */ 5728 const PetscScalar *pd_a, *po_a; 5729 5730 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5731 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5732 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5733 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5734 /* Update values in place */ 5735 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5736 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5737 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5738 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5739 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5740 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5741 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5742 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5743 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5744 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5745 PetscFunctionReturn(PETSC_SUCCESS); 5746 } 5747 5748 /*@C 5749 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5750 5751 Collective 5752 5753 Input Parameters: 5754 + A - the first matrix in `MATMPIAIJ` format 5755 . B - the second matrix in `MATMPIAIJ` format 5756 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5757 5758 Output Parameters: 5759 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5760 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5761 - B_seq - the sequential matrix generated 5762 5763 Level: developer 5764 5765 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5766 @*/ 5767 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5768 { 5769 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5770 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5771 IS isrowb, iscolb; 5772 Mat *bseq = NULL; 5773 5774 PetscFunctionBegin; 5775 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5776 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5777 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5778 5779 if (scall == MAT_INITIAL_MATRIX) { 5780 start = A->cmap->rstart; 5781 cmap = a->garray; 5782 nzA = a->A->cmap->n; 5783 nzB = a->B->cmap->n; 5784 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5785 ncols = 0; 5786 for (i = 0; i < nzB; i++) { /* row < local row index */ 5787 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5788 else break; 5789 } 5790 imark = i; 5791 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5792 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5793 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5794 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5795 } else { 5796 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5797 isrowb = *rowb; 5798 iscolb = *colb; 5799 PetscCall(PetscMalloc1(1, &bseq)); 5800 bseq[0] = *B_seq; 5801 } 5802 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5803 *B_seq = bseq[0]; 5804 PetscCall(PetscFree(bseq)); 5805 if (!rowb) { 5806 PetscCall(ISDestroy(&isrowb)); 5807 } else { 5808 *rowb = isrowb; 5809 } 5810 if (!colb) { 5811 PetscCall(ISDestroy(&iscolb)); 5812 } else { 5813 *colb = iscolb; 5814 } 5815 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5816 PetscFunctionReturn(PETSC_SUCCESS); 5817 } 5818 5819 /* 5820 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5821 of the OFF-DIAGONAL portion of local A 5822 5823 Collective 5824 5825 Input Parameters: 5826 + A,B - the matrices in `MATMPIAIJ` format 5827 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5828 5829 Output Parameter: 5830 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5831 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5832 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5833 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5834 5835 Developer Note: 5836 This directly accesses information inside the VecScatter associated with the matrix-vector product 5837 for this matrix. This is not desirable.. 5838 5839 Level: developer 5840 5841 */ 5842 5843 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5844 { 5845 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5846 VecScatter ctx; 5847 MPI_Comm comm; 5848 const PetscMPIInt *rprocs, *sprocs; 5849 PetscMPIInt nrecvs, nsends; 5850 const PetscInt *srow, *rstarts, *sstarts; 5851 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5852 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5853 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5854 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5855 PetscMPIInt size, tag, rank, nreqs; 5856 5857 PetscFunctionBegin; 5858 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5859 PetscCallMPI(MPI_Comm_size(comm, &size)); 5860 5861 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5862 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5863 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5864 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5865 5866 if (size == 1) { 5867 startsj_s = NULL; 5868 bufa_ptr = NULL; 5869 *B_oth = NULL; 5870 PetscFunctionReturn(PETSC_SUCCESS); 5871 } 5872 5873 ctx = a->Mvctx; 5874 tag = ((PetscObject)ctx)->tag; 5875 5876 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5877 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5878 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5879 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5880 PetscCall(PetscMalloc1(nreqs, &reqs)); 5881 rwaits = reqs; 5882 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5883 5884 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5885 if (scall == MAT_INITIAL_MATRIX) { 5886 /* i-array */ 5887 /* post receives */ 5888 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5889 for (i = 0; i < nrecvs; i++) { 5890 rowlen = rvalues + rstarts[i] * rbs; 5891 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5892 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5893 } 5894 5895 /* pack the outgoing message */ 5896 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5897 5898 sstartsj[0] = 0; 5899 rstartsj[0] = 0; 5900 len = 0; /* total length of j or a array to be sent */ 5901 if (nsends) { 5902 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5903 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5904 } 5905 for (i = 0; i < nsends; i++) { 5906 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5907 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5908 for (j = 0; j < nrows; j++) { 5909 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5910 for (l = 0; l < sbs; l++) { 5911 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5912 5913 rowlen[j * sbs + l] = ncols; 5914 5915 len += ncols; 5916 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5917 } 5918 k++; 5919 } 5920 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5921 5922 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5923 } 5924 /* recvs and sends of i-array are completed */ 5925 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5926 PetscCall(PetscFree(svalues)); 5927 5928 /* allocate buffers for sending j and a arrays */ 5929 PetscCall(PetscMalloc1(len + 1, &bufj)); 5930 PetscCall(PetscMalloc1(len + 1, &bufa)); 5931 5932 /* create i-array of B_oth */ 5933 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5934 5935 b_othi[0] = 0; 5936 len = 0; /* total length of j or a array to be received */ 5937 k = 0; 5938 for (i = 0; i < nrecvs; i++) { 5939 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5940 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5941 for (j = 0; j < nrows; j++) { 5942 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5943 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5944 k++; 5945 } 5946 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5947 } 5948 PetscCall(PetscFree(rvalues)); 5949 5950 /* allocate space for j and a arrays of B_oth */ 5951 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5952 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5953 5954 /* j-array */ 5955 /* post receives of j-array */ 5956 for (i = 0; i < nrecvs; i++) { 5957 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5958 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5959 } 5960 5961 /* pack the outgoing message j-array */ 5962 if (nsends) k = sstarts[0]; 5963 for (i = 0; i < nsends; i++) { 5964 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5965 bufJ = bufj + sstartsj[i]; 5966 for (j = 0; j < nrows; j++) { 5967 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5968 for (ll = 0; ll < sbs; ll++) { 5969 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5970 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5971 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5972 } 5973 } 5974 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5975 } 5976 5977 /* recvs and sends of j-array are completed */ 5978 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5979 } else if (scall == MAT_REUSE_MATRIX) { 5980 sstartsj = *startsj_s; 5981 rstartsj = *startsj_r; 5982 bufa = *bufa_ptr; 5983 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5984 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5985 5986 /* a-array */ 5987 /* post receives of a-array */ 5988 for (i = 0; i < nrecvs; i++) { 5989 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5990 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5991 } 5992 5993 /* pack the outgoing message a-array */ 5994 if (nsends) k = sstarts[0]; 5995 for (i = 0; i < nsends; i++) { 5996 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5997 bufA = bufa + sstartsj[i]; 5998 for (j = 0; j < nrows; j++) { 5999 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6000 for (ll = 0; ll < sbs; ll++) { 6001 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6002 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6003 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6004 } 6005 } 6006 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6007 } 6008 /* recvs and sends of a-array are completed */ 6009 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6010 PetscCall(PetscFree(reqs)); 6011 6012 if (scall == MAT_INITIAL_MATRIX) { 6013 Mat_SeqAIJ *b_oth; 6014 6015 /* put together the new matrix */ 6016 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6017 6018 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6019 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6020 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6021 b_oth->free_a = PETSC_TRUE; 6022 b_oth->free_ij = PETSC_TRUE; 6023 b_oth->nonew = 0; 6024 6025 PetscCall(PetscFree(bufj)); 6026 if (!startsj_s || !bufa_ptr) { 6027 PetscCall(PetscFree2(sstartsj, rstartsj)); 6028 PetscCall(PetscFree(bufa_ptr)); 6029 } else { 6030 *startsj_s = sstartsj; 6031 *startsj_r = rstartsj; 6032 *bufa_ptr = bufa; 6033 } 6034 } else if (scall == MAT_REUSE_MATRIX) { 6035 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6036 } 6037 6038 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6039 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6040 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6041 PetscFunctionReturn(PETSC_SUCCESS); 6042 } 6043 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6047 #if defined(PETSC_HAVE_MKL_SPARSE) 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6052 #if defined(PETSC_HAVE_ELEMENTAL) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_SCALAPACK) 6056 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 #if defined(PETSC_HAVE_HYPRE) 6059 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6060 #endif 6061 #if defined(PETSC_HAVE_CUDA) 6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6063 #endif 6064 #if defined(PETSC_HAVE_HIP) 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6066 #endif 6067 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6069 #endif 6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6071 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6072 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6073 6074 /* 6075 Computes (B'*A')' since computing B*A directly is untenable 6076 6077 n p p 6078 [ ] [ ] [ ] 6079 m [ A ] * n [ B ] = m [ C ] 6080 [ ] [ ] [ ] 6081 6082 */ 6083 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6084 { 6085 Mat At, Bt, Ct; 6086 6087 PetscFunctionBegin; 6088 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6089 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6090 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6091 PetscCall(MatDestroy(&At)); 6092 PetscCall(MatDestroy(&Bt)); 6093 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6094 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6095 PetscCall(MatDestroy(&Ct)); 6096 PetscFunctionReturn(PETSC_SUCCESS); 6097 } 6098 6099 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6100 { 6101 PetscBool cisdense; 6102 6103 PetscFunctionBegin; 6104 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6105 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6106 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6107 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6108 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6109 PetscCall(MatSetUp(C)); 6110 6111 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6112 PetscFunctionReturn(PETSC_SUCCESS); 6113 } 6114 6115 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6116 { 6117 Mat_Product *product = C->product; 6118 Mat A = product->A, B = product->B; 6119 6120 PetscFunctionBegin; 6121 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6122 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6123 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6124 C->ops->productsymbolic = MatProductSymbolic_AB; 6125 PetscFunctionReturn(PETSC_SUCCESS); 6126 } 6127 6128 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6129 { 6130 Mat_Product *product = C->product; 6131 6132 PetscFunctionBegin; 6133 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6134 PetscFunctionReturn(PETSC_SUCCESS); 6135 } 6136 6137 /* 6138 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6139 6140 Input Parameters: 6141 6142 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6143 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6144 6145 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6146 6147 For Set1, j1[] contains column indices of the nonzeros. 6148 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6149 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6150 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6151 6152 Similar for Set2. 6153 6154 This routine merges the two sets of nonzeros row by row and removes repeats. 6155 6156 Output Parameters: (memory is allocated by the caller) 6157 6158 i[],j[]: the CSR of the merged matrix, which has m rows. 6159 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6160 imap2[]: similar to imap1[], but for Set2. 6161 Note we order nonzeros row-by-row and from left to right. 6162 */ 6163 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6164 { 6165 PetscInt r, m; /* Row index of mat */ 6166 PetscCount t, t1, t2, b1, e1, b2, e2; 6167 6168 PetscFunctionBegin; 6169 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6170 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6171 i[0] = 0; 6172 for (r = 0; r < m; r++) { /* Do row by row merging */ 6173 b1 = rowBegin1[r]; 6174 e1 = rowEnd1[r]; 6175 b2 = rowBegin2[r]; 6176 e2 = rowEnd2[r]; 6177 while (b1 < e1 && b2 < e2) { 6178 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6179 j[t] = j1[b1]; 6180 imap1[t1] = t; 6181 imap2[t2] = t; 6182 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6183 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6184 t1++; 6185 t2++; 6186 t++; 6187 } else if (j1[b1] < j2[b2]) { 6188 j[t] = j1[b1]; 6189 imap1[t1] = t; 6190 b1 += jmap1[t1 + 1] - jmap1[t1]; 6191 t1++; 6192 t++; 6193 } else { 6194 j[t] = j2[b2]; 6195 imap2[t2] = t; 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; 6197 t2++; 6198 t++; 6199 } 6200 } 6201 /* Merge the remaining in either j1[] or j2[] */ 6202 while (b1 < e1) { 6203 j[t] = j1[b1]; 6204 imap1[t1] = t; 6205 b1 += jmap1[t1 + 1] - jmap1[t1]; 6206 t1++; 6207 t++; 6208 } 6209 while (b2 < e2) { 6210 j[t] = j2[b2]; 6211 imap2[t2] = t; 6212 b2 += jmap2[t2 + 1] - jmap2[t2]; 6213 t2++; 6214 t++; 6215 } 6216 PetscCall(PetscIntCast(t, i + r + 1)); 6217 } 6218 PetscFunctionReturn(PETSC_SUCCESS); 6219 } 6220 6221 /* 6222 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6223 6224 Input Parameters: 6225 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6226 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6227 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6228 6229 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6230 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6231 6232 Output Parameters: 6233 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6234 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6235 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6236 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6237 6238 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6239 Atot: number of entries belonging to the diagonal block. 6240 Annz: number of unique nonzeros belonging to the diagonal block. 6241 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6242 repeats (i.e., same 'i,j' pair). 6243 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6244 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6245 6246 Atot: number of entries belonging to the diagonal block 6247 Annz: number of unique nonzeros belonging to the diagonal block. 6248 6249 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6250 6251 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6252 */ 6253 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6254 { 6255 PetscInt cstart, cend, rstart, rend, row, col; 6256 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6257 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6258 PetscCount k, m, p, q, r, s, mid; 6259 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6260 6261 PetscFunctionBegin; 6262 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6263 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6264 m = rend - rstart; 6265 6266 /* Skip negative rows */ 6267 for (k = 0; k < n; k++) 6268 if (i[k] >= 0) break; 6269 6270 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6271 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6272 */ 6273 while (k < n) { 6274 row = i[k]; 6275 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6276 for (s = k; s < n; s++) 6277 if (i[s] != row) break; 6278 6279 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6280 for (p = k; p < s; p++) { 6281 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6282 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6283 } 6284 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6285 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6286 rowBegin[row - rstart] = k; 6287 rowMid[row - rstart] = mid; 6288 rowEnd[row - rstart] = s; 6289 6290 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6291 Atot += mid - k; 6292 Btot += s - mid; 6293 6294 /* Count unique nonzeros of this diag row */ 6295 for (p = k; p < mid;) { 6296 col = j[p]; 6297 do { 6298 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6299 p++; 6300 } while (p < mid && j[p] == col); 6301 Annz++; 6302 } 6303 6304 /* Count unique nonzeros of this offdiag row */ 6305 for (p = mid; p < s;) { 6306 col = j[p]; 6307 do { 6308 p++; 6309 } while (p < s && j[p] == col); 6310 Bnnz++; 6311 } 6312 k = s; 6313 } 6314 6315 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6316 PetscCall(PetscMalloc1(Atot, &Aperm)); 6317 PetscCall(PetscMalloc1(Btot, &Bperm)); 6318 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6319 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6320 6321 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6322 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6323 for (r = 0; r < m; r++) { 6324 k = rowBegin[r]; 6325 mid = rowMid[r]; 6326 s = rowEnd[r]; 6327 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6328 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6329 Atot += mid - k; 6330 Btot += s - mid; 6331 6332 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6333 for (p = k; p < mid;) { 6334 col = j[p]; 6335 q = p; 6336 do { 6337 p++; 6338 } while (p < mid && j[p] == col); 6339 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6340 Annz++; 6341 } 6342 6343 for (p = mid; p < s;) { 6344 col = j[p]; 6345 q = p; 6346 do { 6347 p++; 6348 } while (p < s && j[p] == col); 6349 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6350 Bnnz++; 6351 } 6352 } 6353 /* Output */ 6354 *Aperm_ = Aperm; 6355 *Annz_ = Annz; 6356 *Atot_ = Atot; 6357 *Ajmap_ = Ajmap; 6358 *Bperm_ = Bperm; 6359 *Bnnz_ = Bnnz; 6360 *Btot_ = Btot; 6361 *Bjmap_ = Bjmap; 6362 PetscFunctionReturn(PETSC_SUCCESS); 6363 } 6364 6365 /* 6366 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6367 6368 Input Parameters: 6369 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6370 nnz: number of unique nonzeros in the merged matrix 6371 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6372 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6373 6374 Output Parameter: (memory is allocated by the caller) 6375 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6376 6377 Example: 6378 nnz1 = 4 6379 nnz = 6 6380 imap = [1,3,4,5] 6381 jmap = [0,3,5,6,7] 6382 then, 6383 jmap_new = [0,0,3,3,5,6,7] 6384 */ 6385 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6386 { 6387 PetscCount k, p; 6388 6389 PetscFunctionBegin; 6390 jmap_new[0] = 0; 6391 p = nnz; /* p loops over jmap_new[] backwards */ 6392 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6393 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6394 } 6395 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6396 PetscFunctionReturn(PETSC_SUCCESS); 6397 } 6398 6399 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6400 { 6401 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6402 6403 PetscFunctionBegin; 6404 PetscCall(PetscSFDestroy(&coo->sf)); 6405 PetscCall(PetscFree(coo->Aperm1)); 6406 PetscCall(PetscFree(coo->Bperm1)); 6407 PetscCall(PetscFree(coo->Ajmap1)); 6408 PetscCall(PetscFree(coo->Bjmap1)); 6409 PetscCall(PetscFree(coo->Aimap2)); 6410 PetscCall(PetscFree(coo->Bimap2)); 6411 PetscCall(PetscFree(coo->Aperm2)); 6412 PetscCall(PetscFree(coo->Bperm2)); 6413 PetscCall(PetscFree(coo->Ajmap2)); 6414 PetscCall(PetscFree(coo->Bjmap2)); 6415 PetscCall(PetscFree(coo->Cperm1)); 6416 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6417 PetscCall(PetscFree(coo)); 6418 PetscFunctionReturn(PETSC_SUCCESS); 6419 } 6420 6421 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6422 { 6423 MPI_Comm comm; 6424 PetscMPIInt rank, size; 6425 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6426 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6427 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6428 PetscContainer container; 6429 MatCOOStruct_MPIAIJ *coo; 6430 6431 PetscFunctionBegin; 6432 PetscCall(PetscFree(mpiaij->garray)); 6433 PetscCall(VecDestroy(&mpiaij->lvec)); 6434 #if defined(PETSC_USE_CTABLE) 6435 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6436 #else 6437 PetscCall(PetscFree(mpiaij->colmap)); 6438 #endif 6439 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6440 mat->assembled = PETSC_FALSE; 6441 mat->was_assembled = PETSC_FALSE; 6442 6443 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6444 PetscCallMPI(MPI_Comm_size(comm, &size)); 6445 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6446 PetscCall(PetscLayoutSetUp(mat->rmap)); 6447 PetscCall(PetscLayoutSetUp(mat->cmap)); 6448 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6449 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6450 PetscCall(MatGetLocalSize(mat, &m, &n)); 6451 PetscCall(MatGetSize(mat, &M, &N)); 6452 6453 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6454 /* entries come first, then local rows, then remote rows. */ 6455 PetscCount n1 = coo_n, *perm1; 6456 PetscInt *i1 = coo_i, *j1 = coo_j; 6457 6458 PetscCall(PetscMalloc1(n1, &perm1)); 6459 for (k = 0; k < n1; k++) perm1[k] = k; 6460 6461 /* Manipulate indices so that entries with negative row or col indices will have smallest 6462 row indices, local entries will have greater but negative row indices, and remote entries 6463 will have positive row indices. 6464 */ 6465 for (k = 0; k < n1; k++) { 6466 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6467 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6468 else { 6469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6470 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6471 } 6472 } 6473 6474 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6475 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6476 6477 /* Advance k to the first entry we need to take care of */ 6478 for (k = 0; k < n1; k++) 6479 if (i1[k] > PETSC_INT_MIN) break; 6480 PetscCount i1start = k; 6481 6482 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6483 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6484 6485 /* Send remote rows to their owner */ 6486 /* Find which rows should be sent to which remote ranks*/ 6487 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6488 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6489 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6490 const PetscInt *ranges; 6491 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6492 6493 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6494 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6495 for (k = rem; k < n1;) { 6496 PetscMPIInt owner; 6497 PetscInt firstRow, lastRow; 6498 6499 /* Locate a row range */ 6500 firstRow = i1[k]; /* first row of this owner */ 6501 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6502 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6503 6504 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6505 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6506 6507 /* All entries in [k,p) belong to this remote owner */ 6508 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6509 PetscMPIInt *sendto2; 6510 PetscInt *nentries2; 6511 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6512 6513 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6514 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6515 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6516 PetscCall(PetscFree2(sendto, nentries2)); 6517 sendto = sendto2; 6518 nentries = nentries2; 6519 maxNsend = maxNsend2; 6520 } 6521 sendto[nsend] = owner; 6522 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6523 nsend++; 6524 k = p; 6525 } 6526 6527 /* Build 1st SF to know offsets on remote to send data */ 6528 PetscSF sf1; 6529 PetscInt nroots = 1, nroots2 = 0; 6530 PetscInt nleaves = nsend, nleaves2 = 0; 6531 PetscInt *offsets; 6532 PetscSFNode *iremote; 6533 6534 PetscCall(PetscSFCreate(comm, &sf1)); 6535 PetscCall(PetscMalloc1(nsend, &iremote)); 6536 PetscCall(PetscMalloc1(nsend, &offsets)); 6537 for (k = 0; k < nsend; k++) { 6538 iremote[k].rank = sendto[k]; 6539 iremote[k].index = 0; 6540 nleaves2 += nentries[k]; 6541 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6542 } 6543 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6544 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6545 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6546 PetscCall(PetscSFDestroy(&sf1)); 6547 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6548 6549 /* Build 2nd SF to send remote COOs to their owner */ 6550 PetscSF sf2; 6551 nroots = nroots2; 6552 nleaves = nleaves2; 6553 PetscCall(PetscSFCreate(comm, &sf2)); 6554 PetscCall(PetscSFSetFromOptions(sf2)); 6555 PetscCall(PetscMalloc1(nleaves, &iremote)); 6556 p = 0; 6557 for (k = 0; k < nsend; k++) { 6558 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6559 for (q = 0; q < nentries[k]; q++, p++) { 6560 iremote[p].rank = sendto[k]; 6561 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6562 } 6563 } 6564 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6565 6566 /* Send the remote COOs to their owner */ 6567 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6568 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6569 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6570 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6571 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6572 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6573 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6574 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6575 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6576 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6577 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6578 6579 PetscCall(PetscFree(offsets)); 6580 PetscCall(PetscFree2(sendto, nentries)); 6581 6582 /* Sort received COOs by row along with the permutation array */ 6583 for (k = 0; k < n2; k++) perm2[k] = k; 6584 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6585 6586 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6587 PetscCount *Cperm1; 6588 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6589 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6590 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6591 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6592 6593 /* Support for HYPRE matrices, kind of a hack. 6594 Swap min column with diagonal so that diagonal values will go first */ 6595 PetscBool hypre; 6596 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6597 if (hypre) { 6598 PetscInt *minj; 6599 PetscBT hasdiag; 6600 6601 PetscCall(PetscBTCreate(m, &hasdiag)); 6602 PetscCall(PetscMalloc1(m, &minj)); 6603 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6604 for (k = i1start; k < rem; k++) { 6605 if (j1[k] < cstart || j1[k] >= cend) continue; 6606 const PetscInt rindex = i1[k] - rstart; 6607 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6608 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6609 } 6610 for (k = 0; k < n2; k++) { 6611 if (j2[k] < cstart || j2[k] >= cend) continue; 6612 const PetscInt rindex = i2[k] - rstart; 6613 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6614 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6615 } 6616 for (k = i1start; k < rem; k++) { 6617 const PetscInt rindex = i1[k] - rstart; 6618 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6619 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6620 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6621 } 6622 for (k = 0; k < n2; k++) { 6623 const PetscInt rindex = i2[k] - rstart; 6624 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6625 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6626 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6627 } 6628 PetscCall(PetscBTDestroy(&hasdiag)); 6629 PetscCall(PetscFree(minj)); 6630 } 6631 6632 /* Split local COOs and received COOs into diag/offdiag portions */ 6633 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6634 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6635 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6636 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6637 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6638 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6639 6640 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6641 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6642 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6643 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6644 6645 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6646 PetscInt *Ai, *Bi; 6647 PetscInt *Aj, *Bj; 6648 6649 PetscCall(PetscMalloc1(m + 1, &Ai)); 6650 PetscCall(PetscMalloc1(m + 1, &Bi)); 6651 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6652 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6653 6654 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6655 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6656 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6657 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6658 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6659 6660 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6661 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6662 6663 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6664 /* expect nonzeros in A/B most likely have local contributing entries */ 6665 PetscInt Annz = Ai[m]; 6666 PetscInt Bnnz = Bi[m]; 6667 PetscCount *Ajmap1_new, *Bjmap1_new; 6668 6669 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6670 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6671 6672 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6673 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6674 6675 PetscCall(PetscFree(Aimap1)); 6676 PetscCall(PetscFree(Ajmap1)); 6677 PetscCall(PetscFree(Bimap1)); 6678 PetscCall(PetscFree(Bjmap1)); 6679 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6680 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6681 PetscCall(PetscFree(perm1)); 6682 PetscCall(PetscFree3(i2, j2, perm2)); 6683 6684 Ajmap1 = Ajmap1_new; 6685 Bjmap1 = Bjmap1_new; 6686 6687 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6688 if (Annz < Annz1 + Annz2) { 6689 PetscInt *Aj_new; 6690 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6691 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6692 PetscCall(PetscFree(Aj)); 6693 Aj = Aj_new; 6694 } 6695 6696 if (Bnnz < Bnnz1 + Bnnz2) { 6697 PetscInt *Bj_new; 6698 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6699 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6700 PetscCall(PetscFree(Bj)); 6701 Bj = Bj_new; 6702 } 6703 6704 /* Create new submatrices for on-process and off-process coupling */ 6705 PetscScalar *Aa, *Ba; 6706 MatType rtype; 6707 Mat_SeqAIJ *a, *b; 6708 PetscObjectState state; 6709 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6710 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6711 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6712 if (cstart) { 6713 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6714 } 6715 6716 PetscCall(MatGetRootType_Private(mat, &rtype)); 6717 6718 MatSeqXAIJGetOptions_Private(mpiaij->A); 6719 PetscCall(MatDestroy(&mpiaij->A)); 6720 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6721 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6722 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6723 6724 MatSeqXAIJGetOptions_Private(mpiaij->B); 6725 PetscCall(MatDestroy(&mpiaij->B)); 6726 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6727 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6728 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6729 6730 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6731 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6732 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6733 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6734 6735 a = (Mat_SeqAIJ *)mpiaij->A->data; 6736 b = (Mat_SeqAIJ *)mpiaij->B->data; 6737 a->free_a = PETSC_TRUE; 6738 a->free_ij = PETSC_TRUE; 6739 b->free_a = PETSC_TRUE; 6740 b->free_ij = PETSC_TRUE; 6741 a->maxnz = a->nz; 6742 b->maxnz = b->nz; 6743 6744 /* conversion must happen AFTER multiply setup */ 6745 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6746 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6747 PetscCall(VecDestroy(&mpiaij->lvec)); 6748 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6749 6750 // Put the COO struct in a container and then attach that to the matrix 6751 PetscCall(PetscMalloc1(1, &coo)); 6752 coo->n = coo_n; 6753 coo->sf = sf2; 6754 coo->sendlen = nleaves; 6755 coo->recvlen = nroots; 6756 coo->Annz = Annz; 6757 coo->Bnnz = Bnnz; 6758 coo->Annz2 = Annz2; 6759 coo->Bnnz2 = Bnnz2; 6760 coo->Atot1 = Atot1; 6761 coo->Atot2 = Atot2; 6762 coo->Btot1 = Btot1; 6763 coo->Btot2 = Btot2; 6764 coo->Ajmap1 = Ajmap1; 6765 coo->Aperm1 = Aperm1; 6766 coo->Bjmap1 = Bjmap1; 6767 coo->Bperm1 = Bperm1; 6768 coo->Aimap2 = Aimap2; 6769 coo->Ajmap2 = Ajmap2; 6770 coo->Aperm2 = Aperm2; 6771 coo->Bimap2 = Bimap2; 6772 coo->Bjmap2 = Bjmap2; 6773 coo->Bperm2 = Bperm2; 6774 coo->Cperm1 = Cperm1; 6775 // Allocate in preallocation. If not used, it has zero cost on host 6776 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6777 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6778 PetscCall(PetscContainerSetPointer(container, coo)); 6779 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6780 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6781 PetscCall(PetscContainerDestroy(&container)); 6782 PetscFunctionReturn(PETSC_SUCCESS); 6783 } 6784 6785 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6786 { 6787 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6788 Mat A = mpiaij->A, B = mpiaij->B; 6789 PetscScalar *Aa, *Ba; 6790 PetscScalar *sendbuf, *recvbuf; 6791 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6792 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6793 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6794 const PetscCount *Cperm1; 6795 PetscContainer container; 6796 MatCOOStruct_MPIAIJ *coo; 6797 6798 PetscFunctionBegin; 6799 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6800 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6801 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6802 sendbuf = coo->sendbuf; 6803 recvbuf = coo->recvbuf; 6804 Ajmap1 = coo->Ajmap1; 6805 Ajmap2 = coo->Ajmap2; 6806 Aimap2 = coo->Aimap2; 6807 Bjmap1 = coo->Bjmap1; 6808 Bjmap2 = coo->Bjmap2; 6809 Bimap2 = coo->Bimap2; 6810 Aperm1 = coo->Aperm1; 6811 Aperm2 = coo->Aperm2; 6812 Bperm1 = coo->Bperm1; 6813 Bperm2 = coo->Bperm2; 6814 Cperm1 = coo->Cperm1; 6815 6816 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6817 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6818 6819 /* Pack entries to be sent to remote */ 6820 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6821 6822 /* Send remote entries to their owner and overlap the communication with local computation */ 6823 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6824 /* Add local entries to A and B */ 6825 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6826 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6827 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6828 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6829 } 6830 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6831 PetscScalar sum = 0.0; 6832 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6833 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6834 } 6835 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6836 6837 /* Add received remote entries to A and B */ 6838 for (PetscCount i = 0; i < coo->Annz2; i++) { 6839 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6840 } 6841 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6842 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6843 } 6844 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6845 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6846 PetscFunctionReturn(PETSC_SUCCESS); 6847 } 6848 6849 /*MC 6850 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6851 6852 Options Database Keys: 6853 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6854 6855 Level: beginner 6856 6857 Notes: 6858 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6859 in this case the values associated with the rows and columns one passes in are set to zero 6860 in the matrix 6861 6862 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6863 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6864 6865 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6866 M*/ 6867 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6868 { 6869 Mat_MPIAIJ *b; 6870 PetscMPIInt size; 6871 6872 PetscFunctionBegin; 6873 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6874 6875 PetscCall(PetscNew(&b)); 6876 B->data = (void *)b; 6877 B->ops[0] = MatOps_Values; 6878 B->assembled = PETSC_FALSE; 6879 B->insertmode = NOT_SET_VALUES; 6880 b->size = size; 6881 6882 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6883 6884 /* build cache for off array entries formed */ 6885 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6886 6887 b->donotstash = PETSC_FALSE; 6888 b->colmap = NULL; 6889 b->garray = NULL; 6890 b->roworiented = PETSC_TRUE; 6891 6892 /* stuff used for matrix vector multiply */ 6893 b->lvec = NULL; 6894 b->Mvctx = NULL; 6895 6896 /* stuff for MatGetRow() */ 6897 b->rowindices = NULL; 6898 b->rowvalues = NULL; 6899 b->getrowactive = PETSC_FALSE; 6900 6901 /* flexible pointer used in CUSPARSE classes */ 6902 b->spptr = NULL; 6903 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6914 #if defined(PETSC_HAVE_CUDA) 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6916 #endif 6917 #if defined(PETSC_HAVE_HIP) 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6919 #endif 6920 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6922 #endif 6923 #if defined(PETSC_HAVE_MKL_SPARSE) 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6925 #endif 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6930 #if defined(PETSC_HAVE_ELEMENTAL) 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6932 #endif 6933 #if defined(PETSC_HAVE_SCALAPACK) 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6935 #endif 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6938 #if defined(PETSC_HAVE_HYPRE) 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6941 #endif 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6946 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6947 PetscFunctionReturn(PETSC_SUCCESS); 6948 } 6949 6950 /*@ 6951 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6952 and "off-diagonal" part of the matrix in CSR format. 6953 6954 Collective 6955 6956 Input Parameters: 6957 + comm - MPI communicator 6958 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6959 . n - This value should be the same as the local size used in creating the 6960 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6961 calculated if `N` is given) For square matrices `n` is almost always `m`. 6962 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6963 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6964 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6965 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6966 . a - matrix values 6967 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6968 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6969 - oa - matrix values 6970 6971 Output Parameter: 6972 . mat - the matrix 6973 6974 Level: advanced 6975 6976 Notes: 6977 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6978 must free the arrays once the matrix has been destroyed and not before. 6979 6980 The `i` and `j` indices are 0 based 6981 6982 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6983 6984 This sets local rows and cannot be used to set off-processor values. 6985 6986 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6987 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6988 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6989 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6990 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6991 communication if it is known that only local entries will be set. 6992 6993 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6994 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6995 @*/ 6996 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6997 { 6998 Mat_MPIAIJ *maij; 6999 7000 PetscFunctionBegin; 7001 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7002 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7003 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7004 PetscCall(MatCreate(comm, mat)); 7005 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7006 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7007 maij = (Mat_MPIAIJ *)(*mat)->data; 7008 7009 (*mat)->preallocated = PETSC_TRUE; 7010 7011 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7012 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7013 7014 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7015 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7016 7017 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7018 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7019 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7020 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7021 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7022 PetscFunctionReturn(PETSC_SUCCESS); 7023 } 7024 7025 typedef struct { 7026 Mat *mp; /* intermediate products */ 7027 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7028 PetscInt cp; /* number of intermediate products */ 7029 7030 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7031 PetscInt *startsj_s, *startsj_r; 7032 PetscScalar *bufa; 7033 Mat P_oth; 7034 7035 /* may take advantage of merging product->B */ 7036 Mat Bloc; /* B-local by merging diag and off-diag */ 7037 7038 /* cusparse does not have support to split between symbolic and numeric phases. 7039 When api_user is true, we don't need to update the numerical values 7040 of the temporary storage */ 7041 PetscBool reusesym; 7042 7043 /* support for COO values insertion */ 7044 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7045 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7046 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7047 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7048 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7049 PetscMemType mtype; 7050 7051 /* customization */ 7052 PetscBool abmerge; 7053 PetscBool P_oth_bind; 7054 } MatMatMPIAIJBACKEND; 7055 7056 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7057 { 7058 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7059 PetscInt i; 7060 7061 PetscFunctionBegin; 7062 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7063 PetscCall(PetscFree(mmdata->bufa)); 7064 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7065 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7066 PetscCall(MatDestroy(&mmdata->P_oth)); 7067 PetscCall(MatDestroy(&mmdata->Bloc)); 7068 PetscCall(PetscSFDestroy(&mmdata->sf)); 7069 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7070 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7071 PetscCall(PetscFree(mmdata->own[0])); 7072 PetscCall(PetscFree(mmdata->own)); 7073 PetscCall(PetscFree(mmdata->off[0])); 7074 PetscCall(PetscFree(mmdata->off)); 7075 PetscCall(PetscFree(mmdata)); 7076 PetscFunctionReturn(PETSC_SUCCESS); 7077 } 7078 7079 /* Copy selected n entries with indices in idx[] of A to v[]. 7080 If idx is NULL, copy the whole data array of A to v[] 7081 */ 7082 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7083 { 7084 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7085 7086 PetscFunctionBegin; 7087 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7088 if (f) { 7089 PetscCall((*f)(A, n, idx, v)); 7090 } else { 7091 const PetscScalar *vv; 7092 7093 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7094 if (n && idx) { 7095 PetscScalar *w = v; 7096 const PetscInt *oi = idx; 7097 PetscInt j; 7098 7099 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7100 } else { 7101 PetscCall(PetscArraycpy(v, vv, n)); 7102 } 7103 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7104 } 7105 PetscFunctionReturn(PETSC_SUCCESS); 7106 } 7107 7108 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7109 { 7110 MatMatMPIAIJBACKEND *mmdata; 7111 PetscInt i, n_d, n_o; 7112 7113 PetscFunctionBegin; 7114 MatCheckProduct(C, 1); 7115 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7116 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7117 if (!mmdata->reusesym) { /* update temporary matrices */ 7118 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7119 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7120 } 7121 mmdata->reusesym = PETSC_FALSE; 7122 7123 for (i = 0; i < mmdata->cp; i++) { 7124 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7125 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7126 } 7127 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7128 PetscInt noff; 7129 7130 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7131 if (mmdata->mptmp[i]) continue; 7132 if (noff) { 7133 PetscInt nown; 7134 7135 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7136 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7137 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7138 n_o += noff; 7139 n_d += nown; 7140 } else { 7141 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7142 7143 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7144 n_d += mm->nz; 7145 } 7146 } 7147 if (mmdata->hasoffproc) { /* offprocess insertion */ 7148 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7149 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7150 } 7151 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7152 PetscFunctionReturn(PETSC_SUCCESS); 7153 } 7154 7155 /* Support for Pt * A, A * P, or Pt * A * P */ 7156 #define MAX_NUMBER_INTERMEDIATE 4 7157 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7158 { 7159 Mat_Product *product = C->product; 7160 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7161 Mat_MPIAIJ *a, *p; 7162 MatMatMPIAIJBACKEND *mmdata; 7163 ISLocalToGlobalMapping P_oth_l2g = NULL; 7164 IS glob = NULL; 7165 const char *prefix; 7166 char pprefix[256]; 7167 const PetscInt *globidx, *P_oth_idx; 7168 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7169 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7170 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7171 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7172 /* a base offset; type-2: sparse with a local to global map table */ 7173 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7174 7175 MatProductType ptype; 7176 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7177 PetscMPIInt size; 7178 7179 PetscFunctionBegin; 7180 MatCheckProduct(C, 1); 7181 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7182 ptype = product->type; 7183 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7184 ptype = MATPRODUCT_AB; 7185 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7186 } 7187 switch (ptype) { 7188 case MATPRODUCT_AB: 7189 A = product->A; 7190 P = product->B; 7191 m = A->rmap->n; 7192 n = P->cmap->n; 7193 M = A->rmap->N; 7194 N = P->cmap->N; 7195 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7196 break; 7197 case MATPRODUCT_AtB: 7198 P = product->A; 7199 A = product->B; 7200 m = P->cmap->n; 7201 n = A->cmap->n; 7202 M = P->cmap->N; 7203 N = A->cmap->N; 7204 hasoffproc = PETSC_TRUE; 7205 break; 7206 case MATPRODUCT_PtAP: 7207 A = product->A; 7208 P = product->B; 7209 m = P->cmap->n; 7210 n = P->cmap->n; 7211 M = P->cmap->N; 7212 N = P->cmap->N; 7213 hasoffproc = PETSC_TRUE; 7214 break; 7215 default: 7216 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7217 } 7218 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7219 if (size == 1) hasoffproc = PETSC_FALSE; 7220 7221 /* defaults */ 7222 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7223 mp[i] = NULL; 7224 mptmp[i] = PETSC_FALSE; 7225 rmapt[i] = -1; 7226 cmapt[i] = -1; 7227 rmapa[i] = NULL; 7228 cmapa[i] = NULL; 7229 } 7230 7231 /* customization */ 7232 PetscCall(PetscNew(&mmdata)); 7233 mmdata->reusesym = product->api_user; 7234 if (ptype == MATPRODUCT_AB) { 7235 if (product->api_user) { 7236 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7237 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7238 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7239 PetscOptionsEnd(); 7240 } else { 7241 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7242 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7243 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7244 PetscOptionsEnd(); 7245 } 7246 } else if (ptype == MATPRODUCT_PtAP) { 7247 if (product->api_user) { 7248 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7249 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7250 PetscOptionsEnd(); 7251 } else { 7252 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7253 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7254 PetscOptionsEnd(); 7255 } 7256 } 7257 a = (Mat_MPIAIJ *)A->data; 7258 p = (Mat_MPIAIJ *)P->data; 7259 PetscCall(MatSetSizes(C, m, n, M, N)); 7260 PetscCall(PetscLayoutSetUp(C->rmap)); 7261 PetscCall(PetscLayoutSetUp(C->cmap)); 7262 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7263 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7264 7265 cp = 0; 7266 switch (ptype) { 7267 case MATPRODUCT_AB: /* A * P */ 7268 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7269 7270 /* A_diag * P_local (merged or not) */ 7271 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7272 /* P is product->B */ 7273 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7274 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 PetscCall(ISGetIndices(glob, &globidx)); 7284 rmapt[cp] = 1; 7285 cmapt[cp] = 2; 7286 cmapa[cp] = globidx; 7287 mptmp[cp] = PETSC_FALSE; 7288 cp++; 7289 } else { /* A_diag * P_diag and A_diag * P_off */ 7290 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7291 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7292 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7293 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7294 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7295 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7296 mp[cp]->product->api_user = product->api_user; 7297 PetscCall(MatProductSetFromOptions(mp[cp])); 7298 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7299 rmapt[cp] = 1; 7300 cmapt[cp] = 1; 7301 mptmp[cp] = PETSC_FALSE; 7302 cp++; 7303 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 1; 7313 cmapt[cp] = 2; 7314 cmapa[cp] = p->garray; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 } 7318 7319 /* A_off * P_other */ 7320 if (mmdata->P_oth) { 7321 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7322 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7323 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7324 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7325 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 rmapt[cp] = 1; 7335 cmapt[cp] = 2; 7336 cmapa[cp] = P_oth_idx; 7337 mptmp[cp] = PETSC_FALSE; 7338 cp++; 7339 } 7340 break; 7341 7342 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7343 /* A is product->B */ 7344 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7345 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7346 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7347 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7348 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7349 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7350 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7351 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7352 mp[cp]->product->api_user = product->api_user; 7353 PetscCall(MatProductSetFromOptions(mp[cp])); 7354 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7355 PetscCall(ISGetIndices(glob, &globidx)); 7356 rmapt[cp] = 2; 7357 rmapa[cp] = globidx; 7358 cmapt[cp] = 2; 7359 cmapa[cp] = globidx; 7360 mptmp[cp] = PETSC_FALSE; 7361 cp++; 7362 } else { 7363 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7364 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7365 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7366 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7367 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7368 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7369 mp[cp]->product->api_user = product->api_user; 7370 PetscCall(MatProductSetFromOptions(mp[cp])); 7371 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7372 PetscCall(ISGetIndices(glob, &globidx)); 7373 rmapt[cp] = 1; 7374 cmapt[cp] = 2; 7375 cmapa[cp] = globidx; 7376 mptmp[cp] = PETSC_FALSE; 7377 cp++; 7378 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7379 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7380 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7381 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7382 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7383 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7384 mp[cp]->product->api_user = product->api_user; 7385 PetscCall(MatProductSetFromOptions(mp[cp])); 7386 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7387 rmapt[cp] = 2; 7388 rmapa[cp] = p->garray; 7389 cmapt[cp] = 2; 7390 cmapa[cp] = globidx; 7391 mptmp[cp] = PETSC_FALSE; 7392 cp++; 7393 } 7394 break; 7395 case MATPRODUCT_PtAP: 7396 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7397 /* P is product->B */ 7398 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7399 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7400 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7401 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7402 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7403 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7404 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7405 mp[cp]->product->api_user = product->api_user; 7406 PetscCall(MatProductSetFromOptions(mp[cp])); 7407 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7408 PetscCall(ISGetIndices(glob, &globidx)); 7409 rmapt[cp] = 2; 7410 rmapa[cp] = globidx; 7411 cmapt[cp] = 2; 7412 cmapa[cp] = globidx; 7413 mptmp[cp] = PETSC_FALSE; 7414 cp++; 7415 if (mmdata->P_oth) { 7416 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7417 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7418 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7419 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7420 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7421 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7422 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7423 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7424 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7425 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7426 mp[cp]->product->api_user = product->api_user; 7427 PetscCall(MatProductSetFromOptions(mp[cp])); 7428 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7429 mptmp[cp] = PETSC_TRUE; 7430 cp++; 7431 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7432 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7433 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7434 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7435 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7436 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7437 mp[cp]->product->api_user = product->api_user; 7438 PetscCall(MatProductSetFromOptions(mp[cp])); 7439 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7440 rmapt[cp] = 2; 7441 rmapa[cp] = globidx; 7442 cmapt[cp] = 2; 7443 cmapa[cp] = P_oth_idx; 7444 mptmp[cp] = PETSC_FALSE; 7445 cp++; 7446 } 7447 break; 7448 default: 7449 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7450 } 7451 /* sanity check */ 7452 if (size > 1) 7453 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7454 7455 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7456 for (i = 0; i < cp; i++) { 7457 mmdata->mp[i] = mp[i]; 7458 mmdata->mptmp[i] = mptmp[i]; 7459 } 7460 mmdata->cp = cp; 7461 C->product->data = mmdata; 7462 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7463 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7464 7465 /* memory type */ 7466 mmdata->mtype = PETSC_MEMTYPE_HOST; 7467 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7468 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7469 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7470 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7471 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7472 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7473 7474 /* prepare coo coordinates for values insertion */ 7475 7476 /* count total nonzeros of those intermediate seqaij Mats 7477 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7478 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7479 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7480 */ 7481 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7482 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7483 if (mptmp[cp]) continue; 7484 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7485 const PetscInt *rmap = rmapa[cp]; 7486 const PetscInt mr = mp[cp]->rmap->n; 7487 const PetscInt rs = C->rmap->rstart; 7488 const PetscInt re = C->rmap->rend; 7489 const PetscInt *ii = mm->i; 7490 for (i = 0; i < mr; i++) { 7491 const PetscInt gr = rmap[i]; 7492 const PetscInt nz = ii[i + 1] - ii[i]; 7493 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7494 else ncoo_oown += nz; /* this row is local */ 7495 } 7496 } else ncoo_d += mm->nz; 7497 } 7498 7499 /* 7500 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7501 7502 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7503 7504 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7505 7506 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7507 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7508 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7509 7510 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7511 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7512 */ 7513 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7514 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7515 7516 /* gather (i,j) of nonzeros inserted by remote procs */ 7517 if (hasoffproc) { 7518 PetscSF msf; 7519 PetscInt ncoo2, *coo_i2, *coo_j2; 7520 7521 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7522 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7523 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7524 7525 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7526 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7527 PetscInt *idxoff = mmdata->off[cp]; 7528 PetscInt *idxown = mmdata->own[cp]; 7529 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7530 const PetscInt *rmap = rmapa[cp]; 7531 const PetscInt *cmap = cmapa[cp]; 7532 const PetscInt *ii = mm->i; 7533 PetscInt *coi = coo_i + ncoo_o; 7534 PetscInt *coj = coo_j + ncoo_o; 7535 const PetscInt mr = mp[cp]->rmap->n; 7536 const PetscInt rs = C->rmap->rstart; 7537 const PetscInt re = C->rmap->rend; 7538 const PetscInt cs = C->cmap->rstart; 7539 for (i = 0; i < mr; i++) { 7540 const PetscInt *jj = mm->j + ii[i]; 7541 const PetscInt gr = rmap[i]; 7542 const PetscInt nz = ii[i + 1] - ii[i]; 7543 if (gr < rs || gr >= re) { /* this is an offproc row */ 7544 for (j = ii[i]; j < ii[i + 1]; j++) { 7545 *coi++ = gr; 7546 *idxoff++ = j; 7547 } 7548 if (!cmapt[cp]) { /* already global */ 7549 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7550 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7551 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7552 } else { /* offdiag */ 7553 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7554 } 7555 ncoo_o += nz; 7556 } else { /* this is a local row */ 7557 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7558 } 7559 } 7560 } 7561 mmdata->off[cp + 1] = idxoff; 7562 mmdata->own[cp + 1] = idxown; 7563 } 7564 7565 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7566 PetscInt incoo_o; 7567 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7568 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7569 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7570 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7571 ncoo = ncoo_d + ncoo_oown + ncoo2; 7572 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7573 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7574 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7575 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7576 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7577 PetscCall(PetscFree2(coo_i, coo_j)); 7578 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7579 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7580 coo_i = coo_i2; 7581 coo_j = coo_j2; 7582 } else { /* no offproc values insertion */ 7583 ncoo = ncoo_d; 7584 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7585 7586 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7587 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7588 PetscCall(PetscSFSetUp(mmdata->sf)); 7589 } 7590 mmdata->hasoffproc = hasoffproc; 7591 7592 /* gather (i,j) of nonzeros inserted locally */ 7593 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7594 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7595 PetscInt *coi = coo_i + ncoo_d; 7596 PetscInt *coj = coo_j + ncoo_d; 7597 const PetscInt *jj = mm->j; 7598 const PetscInt *ii = mm->i; 7599 const PetscInt *cmap = cmapa[cp]; 7600 const PetscInt *rmap = rmapa[cp]; 7601 const PetscInt mr = mp[cp]->rmap->n; 7602 const PetscInt rs = C->rmap->rstart; 7603 const PetscInt re = C->rmap->rend; 7604 const PetscInt cs = C->cmap->rstart; 7605 7606 if (mptmp[cp]) continue; 7607 if (rmapt[cp] == 1) { /* consecutive rows */ 7608 /* fill coo_i */ 7609 for (i = 0; i < mr; i++) { 7610 const PetscInt gr = i + rs; 7611 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7612 } 7613 /* fill coo_j */ 7614 if (!cmapt[cp]) { /* type-0, already global */ 7615 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7616 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7617 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7618 } else { /* type-2, local to global for sparse columns */ 7619 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7620 } 7621 ncoo_d += mm->nz; 7622 } else if (rmapt[cp] == 2) { /* sparse rows */ 7623 for (i = 0; i < mr; i++) { 7624 const PetscInt *jj = mm->j + ii[i]; 7625 const PetscInt gr = rmap[i]; 7626 const PetscInt nz = ii[i + 1] - ii[i]; 7627 if (gr >= rs && gr < re) { /* local rows */ 7628 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7629 if (!cmapt[cp]) { /* type-0, already global */ 7630 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7631 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7632 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7633 } else { /* type-2, local to global for sparse columns */ 7634 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7635 } 7636 ncoo_d += nz; 7637 } 7638 } 7639 } 7640 } 7641 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7642 PetscCall(ISDestroy(&glob)); 7643 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7644 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7645 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7646 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7647 7648 /* preallocate with COO data */ 7649 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7650 PetscCall(PetscFree2(coo_i, coo_j)); 7651 PetscFunctionReturn(PETSC_SUCCESS); 7652 } 7653 7654 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7655 { 7656 Mat_Product *product = mat->product; 7657 #if defined(PETSC_HAVE_DEVICE) 7658 PetscBool match = PETSC_FALSE; 7659 PetscBool usecpu = PETSC_FALSE; 7660 #else 7661 PetscBool match = PETSC_TRUE; 7662 #endif 7663 7664 PetscFunctionBegin; 7665 MatCheckProduct(mat, 1); 7666 #if defined(PETSC_HAVE_DEVICE) 7667 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7668 if (match) { /* we can always fallback to the CPU if requested */ 7669 switch (product->type) { 7670 case MATPRODUCT_AB: 7671 if (product->api_user) { 7672 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7673 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7674 PetscOptionsEnd(); 7675 } else { 7676 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7677 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7678 PetscOptionsEnd(); 7679 } 7680 break; 7681 case MATPRODUCT_AtB: 7682 if (product->api_user) { 7683 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7684 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7685 PetscOptionsEnd(); 7686 } else { 7687 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7688 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7689 PetscOptionsEnd(); 7690 } 7691 break; 7692 case MATPRODUCT_PtAP: 7693 if (product->api_user) { 7694 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7695 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7696 PetscOptionsEnd(); 7697 } else { 7698 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7699 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7700 PetscOptionsEnd(); 7701 } 7702 break; 7703 default: 7704 break; 7705 } 7706 match = (PetscBool)!usecpu; 7707 } 7708 #endif 7709 if (match) { 7710 switch (product->type) { 7711 case MATPRODUCT_AB: 7712 case MATPRODUCT_AtB: 7713 case MATPRODUCT_PtAP: 7714 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7715 break; 7716 default: 7717 break; 7718 } 7719 } 7720 /* fallback to MPIAIJ ops */ 7721 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7722 PetscFunctionReturn(PETSC_SUCCESS); 7723 } 7724 7725 /* 7726 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7727 7728 n - the number of block indices in cc[] 7729 cc - the block indices (must be large enough to contain the indices) 7730 */ 7731 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7732 { 7733 PetscInt cnt = -1, nidx, j; 7734 const PetscInt *idx; 7735 7736 PetscFunctionBegin; 7737 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7738 if (nidx) { 7739 cnt = 0; 7740 cc[cnt] = idx[0] / bs; 7741 for (j = 1; j < nidx; j++) { 7742 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7743 } 7744 } 7745 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7746 *n = cnt + 1; 7747 PetscFunctionReturn(PETSC_SUCCESS); 7748 } 7749 7750 /* 7751 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7752 7753 ncollapsed - the number of block indices 7754 collapsed - the block indices (must be large enough to contain the indices) 7755 */ 7756 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7757 { 7758 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7759 7760 PetscFunctionBegin; 7761 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7762 for (i = start + 1; i < start + bs; i++) { 7763 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7764 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7765 cprevtmp = cprev; 7766 cprev = merged; 7767 merged = cprevtmp; 7768 } 7769 *ncollapsed = nprev; 7770 if (collapsed) *collapsed = cprev; 7771 PetscFunctionReturn(PETSC_SUCCESS); 7772 } 7773 7774 /* 7775 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7776 7777 Input Parameter: 7778 . Amat - matrix 7779 - symmetrize - make the result symmetric 7780 + scale - scale with diagonal 7781 7782 Output Parameter: 7783 . a_Gmat - output scalar graph >= 0 7784 7785 */ 7786 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7787 { 7788 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7789 MPI_Comm comm; 7790 Mat Gmat; 7791 PetscBool ismpiaij, isseqaij; 7792 Mat a, b, c; 7793 MatType jtype; 7794 7795 PetscFunctionBegin; 7796 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7797 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7798 PetscCall(MatGetSize(Amat, &MM, &NN)); 7799 PetscCall(MatGetBlockSize(Amat, &bs)); 7800 nloc = (Iend - Istart) / bs; 7801 7802 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7803 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7804 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7805 7806 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7807 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7808 implementation */ 7809 if (bs > 1) { 7810 PetscCall(MatGetType(Amat, &jtype)); 7811 PetscCall(MatCreate(comm, &Gmat)); 7812 PetscCall(MatSetType(Gmat, jtype)); 7813 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7814 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7815 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7816 PetscInt *d_nnz, *o_nnz; 7817 MatScalar *aa, val, *AA; 7818 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7819 7820 if (isseqaij) { 7821 a = Amat; 7822 b = NULL; 7823 } else { 7824 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7825 a = d->A; 7826 b = d->B; 7827 } 7828 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7829 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7830 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7831 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7832 const PetscInt *cols1, *cols2; 7833 7834 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7835 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7836 nnz[brow / bs] = nc2 / bs; 7837 if (nc2 % bs) ok = 0; 7838 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7839 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7840 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7841 if (nc1 != nc2) ok = 0; 7842 else { 7843 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7844 if (cols1[jj] != cols2[jj]) ok = 0; 7845 if (cols1[jj] % bs != jj % bs) ok = 0; 7846 } 7847 } 7848 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7849 } 7850 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7851 if (!ok) { 7852 PetscCall(PetscFree2(d_nnz, o_nnz)); 7853 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7854 goto old_bs; 7855 } 7856 } 7857 } 7858 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7859 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7860 PetscCall(PetscFree2(d_nnz, o_nnz)); 7861 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7862 // diag 7863 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7864 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7865 7866 ai = aseq->i; 7867 n = ai[brow + 1] - ai[brow]; 7868 aj = aseq->j + ai[brow]; 7869 for (PetscInt k = 0; k < n; k += bs) { // block columns 7870 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7871 val = 0; 7872 if (index_size == 0) { 7873 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7874 aa = aseq->a + ai[brow + ii] + k; 7875 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7876 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7877 } 7878 } 7879 } else { // use (index,index) value if provided 7880 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7881 PetscInt ii = index[iii]; 7882 aa = aseq->a + ai[brow + ii] + k; 7883 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7884 PetscInt jj = index[jjj]; 7885 val += PetscAbs(PetscRealPart(aa[jj])); 7886 } 7887 } 7888 } 7889 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7890 AA[k / bs] = val; 7891 } 7892 grow = Istart / bs + brow / bs; 7893 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7894 } 7895 // off-diag 7896 if (ismpiaij) { 7897 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7898 const PetscScalar *vals; 7899 const PetscInt *cols, *garray = aij->garray; 7900 7901 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7902 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7903 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7904 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7905 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7906 AA[k / bs] = 0; 7907 AJ[cidx] = garray[cols[k]] / bs; 7908 } 7909 nc = ncols / bs; 7910 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7911 if (index_size == 0) { 7912 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7913 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7914 for (PetscInt k = 0; k < ncols; k += bs) { 7915 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7916 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7917 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7918 } 7919 } 7920 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7921 } 7922 } else { // use (index,index) value if provided 7923 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7924 PetscInt ii = index[iii]; 7925 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7926 for (PetscInt k = 0; k < ncols; k += bs) { 7927 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7928 PetscInt jj = index[jjj]; 7929 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7930 } 7931 } 7932 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7933 } 7934 } 7935 grow = Istart / bs + brow / bs; 7936 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7937 } 7938 } 7939 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7940 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7941 PetscCall(PetscFree2(AA, AJ)); 7942 } else { 7943 const PetscScalar *vals; 7944 const PetscInt *idx; 7945 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7946 old_bs: 7947 /* 7948 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7949 */ 7950 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7951 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7952 if (isseqaij) { 7953 PetscInt max_d_nnz; 7954 7955 /* 7956 Determine exact preallocation count for (sequential) scalar matrix 7957 */ 7958 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7959 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7960 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7961 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7962 PetscCall(PetscFree3(w0, w1, w2)); 7963 } else if (ismpiaij) { 7964 Mat Daij, Oaij; 7965 const PetscInt *garray; 7966 PetscInt max_d_nnz; 7967 7968 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7969 /* 7970 Determine exact preallocation count for diagonal block portion of scalar matrix 7971 */ 7972 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7973 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7974 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7975 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7976 PetscCall(PetscFree3(w0, w1, w2)); 7977 /* 7978 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7979 */ 7980 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7981 o_nnz[jj] = 0; 7982 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7983 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7984 o_nnz[jj] += ncols; 7985 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7986 } 7987 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7988 } 7989 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7990 /* get scalar copy (norms) of matrix */ 7991 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7992 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7993 PetscCall(PetscFree2(d_nnz, o_nnz)); 7994 for (Ii = Istart; Ii < Iend; Ii++) { 7995 PetscInt dest_row = Ii / bs; 7996 7997 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7998 for (jj = 0; jj < ncols; jj++) { 7999 PetscInt dest_col = idx[jj] / bs; 8000 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8001 8002 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8003 } 8004 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8005 } 8006 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8007 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8008 } 8009 } else { 8010 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8011 else { 8012 Gmat = Amat; 8013 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8014 } 8015 if (isseqaij) { 8016 a = Gmat; 8017 b = NULL; 8018 } else { 8019 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8020 a = d->A; 8021 b = d->B; 8022 } 8023 if (filter >= 0 || scale) { 8024 /* take absolute value of each entry */ 8025 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8026 MatInfo info; 8027 PetscScalar *avals; 8028 8029 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8030 PetscCall(MatSeqAIJGetArray(c, &avals)); 8031 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8032 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8033 } 8034 } 8035 } 8036 if (symmetrize) { 8037 PetscBool isset, issym; 8038 8039 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8040 if (!isset || !issym) { 8041 Mat matTrans; 8042 8043 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8044 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8045 PetscCall(MatDestroy(&matTrans)); 8046 } 8047 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8048 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8049 if (scale) { 8050 /* scale c for all diagonal values = 1 or -1 */ 8051 Vec diag; 8052 8053 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8054 PetscCall(MatGetDiagonal(Gmat, diag)); 8055 PetscCall(VecReciprocal(diag)); 8056 PetscCall(VecSqrtAbs(diag)); 8057 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8058 PetscCall(VecDestroy(&diag)); 8059 } 8060 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8061 if (filter >= 0) { 8062 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8063 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8064 } 8065 *a_Gmat = Gmat; 8066 PetscFunctionReturn(PETSC_SUCCESS); 8067 } 8068 8069 /* 8070 Special version for direct calls from Fortran 8071 */ 8072 8073 /* Change these macros so can be used in void function */ 8074 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8075 #undef PetscCall 8076 #define PetscCall(...) \ 8077 do { \ 8078 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8079 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8080 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8081 return; \ 8082 } \ 8083 } while (0) 8084 8085 #undef SETERRQ 8086 #define SETERRQ(comm, ierr, ...) \ 8087 do { \ 8088 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8089 return; \ 8090 } while (0) 8091 8092 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8093 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8094 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8095 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8096 #else 8097 #endif 8098 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8099 { 8100 Mat mat = *mmat; 8101 PetscInt m = *mm, n = *mn; 8102 InsertMode addv = *maddv; 8103 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8104 PetscScalar value; 8105 8106 MatCheckPreallocated(mat, 1); 8107 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8108 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8109 { 8110 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8111 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8112 PetscBool roworiented = aij->roworiented; 8113 8114 /* Some Variables required in the macro */ 8115 Mat A = aij->A; 8116 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8117 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8118 MatScalar *aa; 8119 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8120 Mat B = aij->B; 8121 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8122 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8123 MatScalar *ba; 8124 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8125 * cannot use "#if defined" inside a macro. */ 8126 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8127 8128 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8129 PetscInt nonew = a->nonew; 8130 MatScalar *ap1, *ap2; 8131 8132 PetscFunctionBegin; 8133 PetscCall(MatSeqAIJGetArray(A, &aa)); 8134 PetscCall(MatSeqAIJGetArray(B, &ba)); 8135 for (i = 0; i < m; i++) { 8136 if (im[i] < 0) continue; 8137 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8138 if (im[i] >= rstart && im[i] < rend) { 8139 row = im[i] - rstart; 8140 lastcol1 = -1; 8141 rp1 = aj + ai[row]; 8142 ap1 = aa + ai[row]; 8143 rmax1 = aimax[row]; 8144 nrow1 = ailen[row]; 8145 low1 = 0; 8146 high1 = nrow1; 8147 lastcol2 = -1; 8148 rp2 = bj + bi[row]; 8149 ap2 = ba + bi[row]; 8150 rmax2 = bimax[row]; 8151 nrow2 = bilen[row]; 8152 low2 = 0; 8153 high2 = nrow2; 8154 8155 for (j = 0; j < n; j++) { 8156 if (roworiented) value = v[i * n + j]; 8157 else value = v[i + j * m]; 8158 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8159 if (in[j] >= cstart && in[j] < cend) { 8160 col = in[j] - cstart; 8161 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8162 } else if (in[j] < 0) continue; 8163 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8164 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8165 } else { 8166 if (mat->was_assembled) { 8167 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8168 #if defined(PETSC_USE_CTABLE) 8169 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8170 col--; 8171 #else 8172 col = aij->colmap[in[j]] - 1; 8173 #endif 8174 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8175 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8176 col = in[j]; 8177 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8178 B = aij->B; 8179 b = (Mat_SeqAIJ *)B->data; 8180 bimax = b->imax; 8181 bi = b->i; 8182 bilen = b->ilen; 8183 bj = b->j; 8184 rp2 = bj + bi[row]; 8185 ap2 = ba + bi[row]; 8186 rmax2 = bimax[row]; 8187 nrow2 = bilen[row]; 8188 low2 = 0; 8189 high2 = nrow2; 8190 bm = aij->B->rmap->n; 8191 ba = b->a; 8192 inserted = PETSC_FALSE; 8193 } 8194 } else col = in[j]; 8195 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8196 } 8197 } 8198 } else if (!aij->donotstash) { 8199 if (roworiented) { 8200 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8201 } else { 8202 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8203 } 8204 } 8205 } 8206 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8207 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8208 } 8209 PetscFunctionReturnVoid(); 8210 } 8211 8212 /* Undefining these here since they were redefined from their original definition above! No 8213 * other PETSc functions should be defined past this point, as it is impossible to recover the 8214 * original definitions */ 8215 #undef PetscCall 8216 #undef SETERRQ 8217