1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 PetscMPIInt iN; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 PetscCall(MatNorm(aij->A, type, norm)); 1808 } else { 1809 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1810 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1811 if (type == NORM_FROBENIUS) { 1812 v = amata; 1813 for (i = 0; i < amat->nz; i++) { 1814 sum += PetscRealPart(PetscConj(*v) * (*v)); 1815 v++; 1816 } 1817 v = bmata; 1818 for (i = 0; i < bmat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v) * (*v)); 1820 v++; 1821 } 1822 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 *norm = PetscSqrtReal(*norm); 1824 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1825 } else if (type == NORM_1) { /* max column norm */ 1826 PetscReal *tmp, *tmp2; 1827 PetscInt *jj, *garray = aij->garray; 1828 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1829 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1830 *norm = 0.0; 1831 v = amata; 1832 jj = amat->j; 1833 for (j = 0; j < amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1835 v++; 1836 } 1837 v = bmata; 1838 jj = bmat->j; 1839 for (j = 0; j < bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1844 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1845 for (j = 0; j < mat->cmap->N; j++) { 1846 if (tmp2[j] > *norm) *norm = tmp2[j]; 1847 } 1848 PetscCall(PetscFree(tmp)); 1849 PetscCall(PetscFree(tmp2)); 1850 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1851 } else if (type == NORM_INFINITY) { /* max row norm */ 1852 PetscReal ntemp = 0.0; 1853 for (j = 0; j < aij->A->rmap->n; j++) { 1854 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1855 sum = 0.0; 1856 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); 1858 v++; 1859 } 1860 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1861 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); 1863 v++; 1864 } 1865 if (sum > ntemp) ntemp = sum; 1866 } 1867 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1868 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1869 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1870 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1877 { 1878 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1879 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1880 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1881 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1882 Mat B, A_diag, *B_diag; 1883 const MatScalar *pbv, *bv; 1884 1885 PetscFunctionBegin; 1886 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1887 ma = A->rmap->n; 1888 na = A->cmap->n; 1889 mb = a->B->rmap->n; 1890 nb = a->B->cmap->n; 1891 ai = Aloc->i; 1892 aj = Aloc->j; 1893 bi = Bloc->i; 1894 bj = Bloc->j; 1895 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1896 PetscInt *d_nnz, *g_nnz, *o_nnz; 1897 PetscSFNode *oloc; 1898 PETSC_UNUSED PetscSF sf; 1899 1900 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1901 /* compute d_nnz for preallocation */ 1902 PetscCall(PetscArrayzero(d_nnz, na)); 1903 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1904 /* compute local off-diagonal contributions */ 1905 PetscCall(PetscArrayzero(g_nnz, nb)); 1906 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1907 /* map those to global */ 1908 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1909 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1910 PetscCall(PetscSFSetFromOptions(sf)); 1911 PetscCall(PetscArrayzero(o_nnz, na)); 1912 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1913 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFDestroy(&sf)); 1915 1916 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1917 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1918 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1919 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1920 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1921 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1922 } else { 1923 B = *matout; 1924 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1925 } 1926 1927 b = (Mat_MPIAIJ *)B->data; 1928 A_diag = a->A; 1929 B_diag = &b->A; 1930 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1931 A_diag_ncol = A_diag->cmap->N; 1932 B_diag_ilen = sub_B_diag->ilen; 1933 B_diag_i = sub_B_diag->i; 1934 1935 /* Set ilen for diagonal of B */ 1936 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1937 1938 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1939 very quickly (=without using MatSetValues), because all writes are local. */ 1940 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1941 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1942 1943 /* copy over the B part */ 1944 PetscCall(PetscMalloc1(bi[mb], &cols)); 1945 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1946 pbv = bv; 1947 row = A->rmap->rstart; 1948 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1949 cols_tmp = cols; 1950 for (i = 0; i < mb; i++) { 1951 ncol = bi[i + 1] - bi[i]; 1952 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1953 row++; 1954 if (pbv) pbv += ncol; 1955 if (cols_tmp) cols_tmp += ncol; 1956 } 1957 PetscCall(PetscFree(cols)); 1958 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1959 1960 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1961 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1962 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1963 *matout = B; 1964 } else { 1965 PetscCall(MatHeaderMerge(A, &B)); 1966 } 1967 PetscFunctionReturn(PETSC_SUCCESS); 1968 } 1969 1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1971 { 1972 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1973 Mat a = aij->A, b = aij->B; 1974 PetscInt s1, s2, s3; 1975 1976 PetscFunctionBegin; 1977 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1978 if (rr) { 1979 PetscCall(VecGetLocalSize(rr, &s1)); 1980 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1981 /* Overlap communication with computation. */ 1982 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1983 } 1984 if (ll) { 1985 PetscCall(VecGetLocalSize(ll, &s1)); 1986 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1987 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1988 } 1989 /* scale the diagonal block */ 1990 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1991 1992 if (rr) { 1993 /* Do a scatter end and then right scale the off-diagonal block */ 1994 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1995 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1996 } 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 2004 PetscFunctionBegin; 2005 PetscCall(MatSetUnfactored(a->A)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2010 { 2011 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2012 Mat a, b, c, d; 2013 PetscBool flg; 2014 2015 PetscFunctionBegin; 2016 a = matA->A; 2017 b = matA->B; 2018 c = matB->A; 2019 d = matB->B; 2020 2021 PetscCall(MatEqual(a, c, &flg)); 2022 if (flg) PetscCall(MatEqual(b, d, &flg)); 2023 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2030 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2031 2032 PetscFunctionBegin; 2033 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2034 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2035 /* because of the column compression in the off-processor part of the matrix a->B, 2036 the number of columns in a->B and b->B may be different, hence we cannot call 2037 the MatCopy() directly on the two parts. If need be, we can provide a more 2038 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2039 then copying the submatrices */ 2040 PetscCall(MatCopy_Basic(A, B, str)); 2041 } else { 2042 PetscCall(MatCopy(a->A, b->A, str)); 2043 PetscCall(MatCopy(a->B, b->B, str)); 2044 } 2045 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /* 2050 Computes the number of nonzeros per row needed for preallocation when X and Y 2051 have different nonzero structure. 2052 */ 2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2054 { 2055 PetscInt i, j, k, nzx, nzy; 2056 2057 PetscFunctionBegin; 2058 /* Set the number of nonzeros in the new matrix */ 2059 for (i = 0; i < m; i++) { 2060 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2061 nzx = xi[i + 1] - xi[i]; 2062 nzy = yi[i + 1] - yi[i]; 2063 nnz[i] = 0; 2064 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2065 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2066 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2067 nnz[i]++; 2068 } 2069 for (; k < nzy; k++) nnz[i]++; 2070 } 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2076 { 2077 PetscInt m = Y->rmap->N; 2078 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2079 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2080 2081 PetscFunctionBegin; 2082 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2087 { 2088 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 if (str == SAME_NONZERO_PATTERN) { 2092 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2093 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2094 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2095 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2096 } else { 2097 Mat B; 2098 PetscInt *nnz_d, *nnz_o; 2099 2100 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2101 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2102 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2103 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2104 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2105 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2106 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2107 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2108 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2109 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2110 PetscCall(MatHeaderMerge(Y, &B)); 2111 PetscCall(PetscFree(nnz_d)); 2112 PetscCall(PetscFree(nnz_o)); 2113 } 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2118 2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2120 { 2121 PetscFunctionBegin; 2122 if (PetscDefined(USE_COMPLEX)) { 2123 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2124 2125 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2126 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2127 } 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCall(MatRealPart(a->A)); 2137 PetscCall(MatRealPart(a->B)); 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatImaginaryPart(a->A)); 2147 PetscCall(MatImaginaryPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 PetscInt i, *idxb = NULL, m = A->rmap->n; 2155 PetscScalar *va, *vv; 2156 Vec vB, vA; 2157 const PetscScalar *vb; 2158 2159 PetscFunctionBegin; 2160 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2161 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2162 2163 PetscCall(VecGetArrayWrite(vA, &va)); 2164 if (idx) { 2165 for (i = 0; i < m; i++) { 2166 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2167 } 2168 } 2169 2170 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2171 PetscCall(PetscMalloc1(m, &idxb)); 2172 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2173 2174 PetscCall(VecGetArrayWrite(v, &vv)); 2175 PetscCall(VecGetArrayRead(vB, &vb)); 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2178 vv[i] = vb[i]; 2179 if (idx) idx[i] = a->garray[idxb[i]]; 2180 } else { 2181 vv[i] = va[i]; 2182 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2183 } 2184 } 2185 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2186 PetscCall(VecRestoreArrayWrite(vA, &va)); 2187 PetscCall(VecRestoreArrayRead(vB, &vb)); 2188 PetscCall(PetscFree(idxb)); 2189 PetscCall(VecDestroy(&vA)); 2190 PetscCall(VecDestroy(&vB)); 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2197 Vec vB, vA; 2198 2199 PetscFunctionBegin; 2200 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2201 PetscCall(MatGetRowSumAbs(a->A, vA)); 2202 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2203 PetscCall(MatGetRowSumAbs(a->B, vB)); 2204 PetscCall(VecAXPY(vA, 1.0, vB)); 2205 PetscCall(VecDestroy(&vB)); 2206 PetscCall(VecCopy(vA, v)); 2207 PetscCall(VecDestroy(&vA)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2212 { 2213 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2214 PetscInt m = A->rmap->n, n = A->cmap->n; 2215 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2216 PetscInt *cmap = mat->garray; 2217 PetscInt *diagIdx, *offdiagIdx; 2218 Vec diagV, offdiagV; 2219 PetscScalar *a, *diagA, *offdiagA; 2220 const PetscScalar *ba, *bav; 2221 PetscInt r, j, col, ncols, *bi, *bj; 2222 Mat B = mat->B; 2223 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2224 2225 PetscFunctionBegin; 2226 /* When a process holds entire A and other processes have no entry */ 2227 if (A->cmap->N == n) { 2228 PetscCall(VecGetArrayWrite(v, &diagA)); 2229 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2230 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2231 PetscCall(VecDestroy(&diagV)); 2232 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2233 PetscFunctionReturn(PETSC_SUCCESS); 2234 } else if (n == 0) { 2235 if (m) { 2236 PetscCall(VecGetArrayWrite(v, &a)); 2237 for (r = 0; r < m; r++) { 2238 a[r] = 0.0; 2239 if (idx) idx[r] = -1; 2240 } 2241 PetscCall(VecRestoreArrayWrite(v, &a)); 2242 } 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r + 1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; 2261 offdiagIdx[r] = cmap[0]; 2262 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2263 offdiagA[r] = 0.0; 2264 2265 /* Find first hole in the cmap */ 2266 for (j = 0; j < ncols; j++) { 2267 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2268 if (col > j && j < cstart) { 2269 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2270 break; 2271 } else if (col > j + n && j >= cstart) { 2272 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2273 break; 2274 } 2275 } 2276 if (j == ncols && ncols < A->cmap->N - n) { 2277 /* a hole is outside compressed Bcols */ 2278 if (ncols == 0) { 2279 if (cstart) { 2280 offdiagIdx[r] = 0; 2281 } else offdiagIdx[r] = cend; 2282 } else { /* ncols > 0 */ 2283 offdiagIdx[r] = cmap[ncols - 1] + 1; 2284 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2285 } 2286 } 2287 } 2288 2289 for (j = 0; j < ncols; j++) { 2290 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2291 offdiagA[r] = *ba; 2292 offdiagIdx[r] = cmap[*bj]; 2293 } 2294 ba++; 2295 bj++; 2296 } 2297 } 2298 2299 PetscCall(VecGetArrayWrite(v, &a)); 2300 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2301 for (r = 0; r < m; ++r) { 2302 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) idx[r] = cstart + diagIdx[r]; 2305 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 if (idx) { 2308 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2309 idx[r] = cstart + diagIdx[r]; 2310 } else idx[r] = offdiagIdx[r]; 2311 } 2312 } else { 2313 a[r] = offdiagA[r]; 2314 if (idx) idx[r] = offdiagIdx[r]; 2315 } 2316 } 2317 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2318 PetscCall(VecRestoreArrayWrite(v, &a)); 2319 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2321 PetscCall(VecDestroy(&diagV)); 2322 PetscCall(VecDestroy(&offdiagV)); 2323 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2330 PetscInt m = A->rmap->n, n = A->cmap->n; 2331 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2332 PetscInt *cmap = mat->garray; 2333 PetscInt *diagIdx, *offdiagIdx; 2334 Vec diagV, offdiagV; 2335 PetscScalar *a, *diagA, *offdiagA; 2336 const PetscScalar *ba, *bav; 2337 PetscInt r, j, col, ncols, *bi, *bj; 2338 Mat B = mat->B; 2339 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2340 2341 PetscFunctionBegin; 2342 /* When a process holds entire A and other processes have no entry */ 2343 if (A->cmap->N == n) { 2344 PetscCall(VecGetArrayWrite(v, &diagA)); 2345 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2346 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2347 PetscCall(VecDestroy(&diagV)); 2348 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2349 PetscFunctionReturn(PETSC_SUCCESS); 2350 } else if (n == 0) { 2351 if (m) { 2352 PetscCall(VecGetArrayWrite(v, &a)); 2353 for (r = 0; r < m; r++) { 2354 a[r] = PETSC_MAX_REAL; 2355 if (idx) idx[r] = -1; 2356 } 2357 PetscCall(VecRestoreArrayWrite(v, &a)); 2358 } 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2366 2367 /* Get offdiagIdx[] for implicit 0.0 */ 2368 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2369 ba = bav; 2370 bi = b->i; 2371 bj = b->j; 2372 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2373 for (r = 0; r < m; r++) { 2374 ncols = bi[r + 1] - bi[r]; 2375 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2376 offdiagA[r] = *ba; 2377 offdiagIdx[r] = cmap[0]; 2378 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2379 offdiagA[r] = 0.0; 2380 2381 /* Find first hole in the cmap */ 2382 for (j = 0; j < ncols; j++) { 2383 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2384 if (col > j && j < cstart) { 2385 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2386 break; 2387 } else if (col > j + n && j >= cstart) { 2388 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2389 break; 2390 } 2391 } 2392 if (j == ncols && ncols < A->cmap->N - n) { 2393 /* a hole is outside compressed Bcols */ 2394 if (ncols == 0) { 2395 if (cstart) { 2396 offdiagIdx[r] = 0; 2397 } else offdiagIdx[r] = cend; 2398 } else { /* ncols > 0 */ 2399 offdiagIdx[r] = cmap[ncols - 1] + 1; 2400 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2401 } 2402 } 2403 } 2404 2405 for (j = 0; j < ncols; j++) { 2406 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2407 offdiagA[r] = *ba; 2408 offdiagIdx[r] = cmap[*bj]; 2409 } 2410 ba++; 2411 bj++; 2412 } 2413 } 2414 2415 PetscCall(VecGetArrayWrite(v, &a)); 2416 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2417 for (r = 0; r < m; ++r) { 2418 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) idx[r] = cstart + diagIdx[r]; 2421 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2422 a[r] = diagA[r]; 2423 if (idx) { 2424 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2425 idx[r] = cstart + diagIdx[r]; 2426 } else idx[r] = offdiagIdx[r]; 2427 } 2428 } else { 2429 a[r] = offdiagA[r]; 2430 if (idx) idx[r] = offdiagIdx[r]; 2431 } 2432 } 2433 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2434 PetscCall(VecRestoreArrayWrite(v, &a)); 2435 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2437 PetscCall(VecDestroy(&diagV)); 2438 PetscCall(VecDestroy(&offdiagV)); 2439 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2446 PetscInt m = A->rmap->n, n = A->cmap->n; 2447 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2448 PetscInt *cmap = mat->garray; 2449 PetscInt *diagIdx, *offdiagIdx; 2450 Vec diagV, offdiagV; 2451 PetscScalar *a, *diagA, *offdiagA; 2452 const PetscScalar *ba, *bav; 2453 PetscInt r, j, col, ncols, *bi, *bj; 2454 Mat B = mat->B; 2455 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2456 2457 PetscFunctionBegin; 2458 /* When a process holds entire A and other processes have no entry */ 2459 if (A->cmap->N == n) { 2460 PetscCall(VecGetArrayWrite(v, &diagA)); 2461 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2462 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2463 PetscCall(VecDestroy(&diagV)); 2464 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2465 PetscFunctionReturn(PETSC_SUCCESS); 2466 } else if (n == 0) { 2467 if (m) { 2468 PetscCall(VecGetArrayWrite(v, &a)); 2469 for (r = 0; r < m; r++) { 2470 a[r] = PETSC_MIN_REAL; 2471 if (idx) idx[r] = -1; 2472 } 2473 PetscCall(VecRestoreArrayWrite(v, &a)); 2474 } 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } 2477 2478 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2482 2483 /* Get offdiagIdx[] for implicit 0.0 */ 2484 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2485 ba = bav; 2486 bi = b->i; 2487 bj = b->j; 2488 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2489 for (r = 0; r < m; r++) { 2490 ncols = bi[r + 1] - bi[r]; 2491 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2492 offdiagA[r] = *ba; 2493 offdiagIdx[r] = cmap[0]; 2494 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2495 offdiagA[r] = 0.0; 2496 2497 /* Find first hole in the cmap */ 2498 for (j = 0; j < ncols; j++) { 2499 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2500 if (col > j && j < cstart) { 2501 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2502 break; 2503 } else if (col > j + n && j >= cstart) { 2504 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2505 break; 2506 } 2507 } 2508 if (j == ncols && ncols < A->cmap->N - n) { 2509 /* a hole is outside compressed Bcols */ 2510 if (ncols == 0) { 2511 if (cstart) { 2512 offdiagIdx[r] = 0; 2513 } else offdiagIdx[r] = cend; 2514 } else { /* ncols > 0 */ 2515 offdiagIdx[r] = cmap[ncols - 1] + 1; 2516 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2517 } 2518 } 2519 } 2520 2521 for (j = 0; j < ncols; j++) { 2522 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2523 offdiagA[r] = *ba; 2524 offdiagIdx[r] = cmap[*bj]; 2525 } 2526 ba++; 2527 bj++; 2528 } 2529 } 2530 2531 PetscCall(VecGetArrayWrite(v, &a)); 2532 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2533 for (r = 0; r < m; ++r) { 2534 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) idx[r] = cstart + diagIdx[r]; 2537 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 if (idx) { 2540 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2541 idx[r] = cstart + diagIdx[r]; 2542 } else idx[r] = offdiagIdx[r]; 2543 } 2544 } else { 2545 a[r] = offdiagA[r]; 2546 if (idx) idx[r] = offdiagIdx[r]; 2547 } 2548 } 2549 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2550 PetscCall(VecRestoreArrayWrite(v, &a)); 2551 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2553 PetscCall(VecDestroy(&diagV)); 2554 PetscCall(VecDestroy(&offdiagV)); 2555 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2556 PetscFunctionReturn(PETSC_SUCCESS); 2557 } 2558 2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2560 { 2561 Mat *dummy; 2562 2563 PetscFunctionBegin; 2564 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2565 *newmat = *dummy; 2566 PetscCall(PetscFree(dummy)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2573 2574 PetscFunctionBegin; 2575 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2576 A->factorerrortype = a->A->factorerrortype; 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2581 { 2582 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2583 2584 PetscFunctionBegin; 2585 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2586 PetscCall(MatSetRandom(aij->A, rctx)); 2587 if (x->assembled) { 2588 PetscCall(MatSetRandom(aij->B, rctx)); 2589 } else { 2590 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2591 } 2592 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2593 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2598 { 2599 PetscFunctionBegin; 2600 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2601 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 /*@ 2606 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2607 2608 Not Collective 2609 2610 Input Parameter: 2611 . A - the matrix 2612 2613 Output Parameter: 2614 . nz - the number of nonzeros 2615 2616 Level: advanced 2617 2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2619 @*/ 2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2621 { 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2623 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2624 PetscBool isaij; 2625 2626 PetscFunctionBegin; 2627 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2628 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2629 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 /*@ 2634 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2635 2636 Collective 2637 2638 Input Parameters: 2639 + A - the matrix 2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2641 2642 Level: advanced 2643 2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2645 @*/ 2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2647 { 2648 PetscFunctionBegin; 2649 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2654 { 2655 PetscBool sc = PETSC_FALSE, flg; 2656 2657 PetscFunctionBegin; 2658 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2659 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2660 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2661 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2662 PetscOptionsHeadEnd(); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2667 { 2668 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2669 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2670 2671 PetscFunctionBegin; 2672 if (!Y->preallocated) { 2673 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2674 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2675 PetscInt nonew = aij->nonew; 2676 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2677 aij->nonew = nonew; 2678 } 2679 PetscCall(MatShift_Basic(Y, a)); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2686 2687 PetscFunctionBegin; 2688 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2689 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2690 if (d) { 2691 PetscInt rstart; 2692 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2693 *d += rstart; 2694 } 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2704 PetscFunctionReturn(PETSC_SUCCESS); 2705 } 2706 2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2710 2711 PetscFunctionBegin; 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2718 MatGetRow_MPIAIJ, 2719 MatRestoreRow_MPIAIJ, 2720 MatMult_MPIAIJ, 2721 /* 4*/ MatMultAdd_MPIAIJ, 2722 MatMultTranspose_MPIAIJ, 2723 MatMultTransposeAdd_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*10*/ NULL, 2728 NULL, 2729 NULL, 2730 MatSOR_MPIAIJ, 2731 MatTranspose_MPIAIJ, 2732 /*15*/ MatGetInfo_MPIAIJ, 2733 MatEqual_MPIAIJ, 2734 MatGetDiagonal_MPIAIJ, 2735 MatDiagonalScale_MPIAIJ, 2736 MatNorm_MPIAIJ, 2737 /*20*/ MatAssemblyBegin_MPIAIJ, 2738 MatAssemblyEnd_MPIAIJ, 2739 MatSetOption_MPIAIJ, 2740 MatZeroEntries_MPIAIJ, 2741 /*24*/ MatZeroRows_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*29*/ MatSetUp_MPI_Hash, 2747 NULL, 2748 NULL, 2749 MatGetDiagonalBlock_MPIAIJ, 2750 NULL, 2751 /*34*/ MatDuplicate_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*39*/ MatAXPY_MPIAIJ, 2757 MatCreateSubMatrices_MPIAIJ, 2758 MatIncreaseOverlap_MPIAIJ, 2759 MatGetValues_MPIAIJ, 2760 MatCopy_MPIAIJ, 2761 /*44*/ MatGetRowMax_MPIAIJ, 2762 MatScale_MPIAIJ, 2763 MatShift_MPIAIJ, 2764 MatDiagonalSet_MPIAIJ, 2765 MatZeroRowsColumns_MPIAIJ, 2766 /*49*/ MatSetRandom_MPIAIJ, 2767 MatGetRowIJ_MPIAIJ, 2768 MatRestoreRowIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 /*54*/ MatFDColoringCreate_MPIXAIJ, 2772 NULL, 2773 MatSetUnfactored_MPIAIJ, 2774 MatPermute_MPIAIJ, 2775 NULL, 2776 /*59*/ MatCreateSubMatrix_MPIAIJ, 2777 MatDestroy_MPIAIJ, 2778 MatView_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*64*/ NULL, 2782 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2787 MatGetRowMinAbs_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 NULL, 2792 /*75*/ MatFDColoringApply_AIJ, 2793 MatSetFromOptions_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFindZeroDiagonals_MPIAIJ, 2797 /*80*/ NULL, 2798 NULL, 2799 NULL, 2800 /*83*/ MatLoad_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*89*/ NULL, 2807 NULL, 2808 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 MatBindToCPU_MPIAIJ, 2816 /*99*/ MatProductSetFromOptions_MPIAIJ, 2817 NULL, 2818 NULL, 2819 MatConjugate_MPIAIJ, 2820 NULL, 2821 /*104*/ MatSetValuesRow_MPIAIJ, 2822 MatRealPart_MPIAIJ, 2823 MatImaginaryPart_MPIAIJ, 2824 NULL, 2825 NULL, 2826 /*109*/ NULL, 2827 NULL, 2828 MatGetRowMin_MPIAIJ, 2829 NULL, 2830 MatMissingDiagonal_MPIAIJ, 2831 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2832 NULL, 2833 MatGetGhosts_MPIAIJ, 2834 NULL, 2835 NULL, 2836 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2837 NULL, 2838 NULL, 2839 NULL, 2840 MatGetMultiProcBlock_MPIAIJ, 2841 /*124*/ MatFindNonzeroRows_MPIAIJ, 2842 MatGetColumnReductions_MPIAIJ, 2843 MatInvertBlockDiagonal_MPIAIJ, 2844 MatInvertVariableBlockDiagonal_MPIAIJ, 2845 MatCreateSubMatricesMPI_MPIAIJ, 2846 /*129*/ NULL, 2847 NULL, 2848 NULL, 2849 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2850 NULL, 2851 /*134*/ NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 /*139*/ MatSetBlockSizes_MPIAIJ, 2857 NULL, 2858 NULL, 2859 MatFDColoringSetUp_MPIXAIJ, 2860 MatFindOffBlockDiagonalEntries_MPIAIJ, 2861 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2862 /*145*/ NULL, 2863 NULL, 2864 NULL, 2865 MatCreateGraph_Simple_AIJ, 2866 NULL, 2867 /*150*/ NULL, 2868 MatEliminateZeros_MPIAIJ, 2869 MatGetRowSumAbs_MPIAIJ, 2870 NULL, 2871 NULL, 2872 NULL}; 2873 2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatStoreValues(aij->A)); 2880 PetscCall(MatStoreValues(aij->B)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatRetrieveValues(aij->A)); 2890 PetscCall(MatRetrieveValues(aij->B)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2895 { 2896 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2897 PetscMPIInt size; 2898 2899 PetscFunctionBegin; 2900 if (B->hash_active) { 2901 B->ops[0] = b->cops; 2902 B->hash_active = PETSC_FALSE; 2903 } 2904 PetscCall(PetscLayoutSetUp(B->rmap)); 2905 PetscCall(PetscLayoutSetUp(B->cmap)); 2906 2907 #if defined(PETSC_USE_CTABLE) 2908 PetscCall(PetscHMapIDestroy(&b->colmap)); 2909 #else 2910 PetscCall(PetscFree(b->colmap)); 2911 #endif 2912 PetscCall(PetscFree(b->garray)); 2913 PetscCall(VecDestroy(&b->lvec)); 2914 PetscCall(VecScatterDestroy(&b->Mvctx)); 2915 2916 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2917 2918 MatSeqXAIJGetOptions_Private(b->B); 2919 PetscCall(MatDestroy(&b->B)); 2920 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2921 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2922 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2923 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2924 MatSeqXAIJRestoreOptions_Private(b->B); 2925 2926 MatSeqXAIJGetOptions_Private(b->A); 2927 PetscCall(MatDestroy(&b->A)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2929 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2930 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2931 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->A); 2933 2934 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2935 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2936 B->preallocated = PETSC_TRUE; 2937 B->was_assembled = PETSC_FALSE; 2938 B->assembled = PETSC_FALSE; 2939 PetscFunctionReturn(PETSC_SUCCESS); 2940 } 2941 2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2943 { 2944 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2945 2946 PetscFunctionBegin; 2947 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2948 PetscCall(PetscLayoutSetUp(B->rmap)); 2949 PetscCall(PetscLayoutSetUp(B->cmap)); 2950 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2951 else { 2952 #if defined(PETSC_USE_CTABLE) 2953 PetscCall(PetscHMapIDestroy(&b->colmap)); 2954 #else 2955 PetscCall(PetscFree(b->colmap)); 2956 #endif 2957 PetscCall(PetscFree(b->garray)); 2958 PetscCall(VecDestroy(&b->lvec)); 2959 } 2960 PetscCall(VecScatterDestroy(&b->Mvctx)); 2961 2962 PetscCall(MatResetPreallocation(b->A)); 2963 PetscCall(MatResetPreallocation(b->B)); 2964 B->preallocated = PETSC_TRUE; 2965 B->was_assembled = PETSC_FALSE; 2966 B->assembled = PETSC_FALSE; 2967 PetscFunctionReturn(PETSC_SUCCESS); 2968 } 2969 2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2971 { 2972 Mat mat; 2973 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2974 2975 PetscFunctionBegin; 2976 *newmat = NULL; 2977 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2978 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2979 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2980 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2981 a = (Mat_MPIAIJ *)mat->data; 2982 2983 mat->factortype = matin->factortype; 2984 mat->assembled = matin->assembled; 2985 mat->insertmode = NOT_SET_VALUES; 2986 2987 a->size = oldmat->size; 2988 a->rank = oldmat->rank; 2989 a->donotstash = oldmat->donotstash; 2990 a->roworiented = oldmat->roworiented; 2991 a->rowindices = NULL; 2992 a->rowvalues = NULL; 2993 a->getrowactive = PETSC_FALSE; 2994 2995 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2996 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2997 if (matin->hash_active) { 2998 PetscCall(MatSetUp(mat)); 2999 } else { 3000 mat->preallocated = matin->preallocated; 3001 if (oldmat->colmap) { 3002 #if defined(PETSC_USE_CTABLE) 3003 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3004 #else 3005 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3006 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3007 #endif 3008 } else a->colmap = NULL; 3009 if (oldmat->garray) { 3010 PetscInt len; 3011 len = oldmat->B->cmap->n; 3012 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3013 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3014 } else a->garray = NULL; 3015 3016 /* It may happen MatDuplicate is called with a non-assembled matrix 3017 In fact, MatDuplicate only requires the matrix to be preallocated 3018 This may happen inside a DMCreateMatrix_Shell */ 3019 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3020 if (oldmat->Mvctx) { 3021 a->Mvctx = oldmat->Mvctx; 3022 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3023 } 3024 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3025 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3026 } 3027 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3028 *newmat = mat; 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3033 { 3034 PetscBool isbinary, ishdf5; 3035 3036 PetscFunctionBegin; 3037 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3038 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3039 /* force binary viewer to load .info file if it has not yet done so */ 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3043 if (isbinary) { 3044 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3045 } else if (ishdf5) { 3046 #if defined(PETSC_HAVE_HDF5) 3047 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3048 #else 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3050 #endif 3051 } else { 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3053 } 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3058 { 3059 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3060 PetscInt *rowidxs, *colidxs; 3061 PetscScalar *matvals; 3062 3063 PetscFunctionBegin; 3064 PetscCall(PetscViewerSetUp(viewer)); 3065 3066 /* read in matrix header */ 3067 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3068 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3069 M = header[1]; 3070 N = header[2]; 3071 nz = header[3]; 3072 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3073 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3074 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3075 3076 /* set block sizes from the viewer's .info file */ 3077 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3078 /* set global sizes if not set already */ 3079 if (mat->rmap->N < 0) mat->rmap->N = M; 3080 if (mat->cmap->N < 0) mat->cmap->N = N; 3081 PetscCall(PetscLayoutSetUp(mat->rmap)); 3082 PetscCall(PetscLayoutSetUp(mat->cmap)); 3083 3084 /* check if the matrix sizes are correct */ 3085 PetscCall(MatGetSize(mat, &rows, &cols)); 3086 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3087 3088 /* read in row lengths and build row indices */ 3089 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3090 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3092 rowidxs[0] = 0; 3093 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3094 if (nz != PETSC_INT_MAX) { 3095 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 } 3098 3099 /* read in column indices and matrix values */ 3100 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3102 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3103 /* store matrix indices and values */ 3104 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3105 PetscCall(PetscFree(rowidxs)); 3106 PetscCall(PetscFree2(colidxs, matvals)); 3107 PetscFunctionReturn(PETSC_SUCCESS); 3108 } 3109 3110 /* Not scalable because of ISAllGather() unless getting all columns. */ 3111 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3112 { 3113 IS iscol_local; 3114 PetscBool isstride; 3115 PetscMPIInt lisstride = 0, gisstride; 3116 3117 PetscFunctionBegin; 3118 /* check if we are grabbing all columns*/ 3119 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3120 3121 if (isstride) { 3122 PetscInt start, len, mstart, mlen; 3123 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3124 PetscCall(ISGetLocalSize(iscol, &len)); 3125 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3126 if (mstart == start && mlen - mstart == len) lisstride = 1; 3127 } 3128 3129 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3130 if (gisstride) { 3131 PetscInt N; 3132 PetscCall(MatGetSize(mat, NULL, &N)); 3133 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3134 PetscCall(ISSetIdentity(iscol_local)); 3135 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3136 } else { 3137 PetscInt cbs; 3138 PetscCall(ISGetBlockSize(iscol, &cbs)); 3139 PetscCall(ISAllGather(iscol, &iscol_local)); 3140 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3141 } 3142 3143 *isseq = iscol_local; 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 /* 3148 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3149 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3150 3151 Input Parameters: 3152 + mat - matrix 3153 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3154 i.e., mat->rstart <= isrow[i] < mat->rend 3155 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3156 i.e., mat->cstart <= iscol[i] < mat->cend 3157 3158 Output Parameters: 3159 + isrow_d - sequential row index set for retrieving mat->A 3160 . iscol_d - sequential column index set for retrieving mat->A 3161 . iscol_o - sequential column index set for retrieving mat->B 3162 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3163 */ 3164 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3165 { 3166 Vec x, cmap; 3167 const PetscInt *is_idx; 3168 PetscScalar *xarray, *cmaparray; 3169 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3171 Mat B = a->B; 3172 Vec lvec = a->lvec, lcmap; 3173 PetscInt i, cstart, cend, Bn = B->cmap->N; 3174 MPI_Comm comm; 3175 VecScatter Mvctx = a->Mvctx; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3179 PetscCall(ISGetLocalSize(iscol, &ncols)); 3180 3181 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3182 PetscCall(MatCreateVecs(mat, &x, NULL)); 3183 PetscCall(VecSet(x, -1.0)); 3184 PetscCall(VecDuplicate(x, &cmap)); 3185 PetscCall(VecSet(cmap, -1.0)); 3186 3187 /* Get start indices */ 3188 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3189 isstart -= ncols; 3190 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3191 3192 PetscCall(ISGetIndices(iscol, &is_idx)); 3193 PetscCall(VecGetArray(x, &xarray)); 3194 PetscCall(VecGetArray(cmap, &cmaparray)); 3195 PetscCall(PetscMalloc1(ncols, &idx)); 3196 for (i = 0; i < ncols; i++) { 3197 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3198 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3199 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3200 } 3201 PetscCall(VecRestoreArray(x, &xarray)); 3202 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3203 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3204 3205 /* Get iscol_d */ 3206 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3207 PetscCall(ISGetBlockSize(iscol, &i)); 3208 PetscCall(ISSetBlockSize(*iscol_d, i)); 3209 3210 /* Get isrow_d */ 3211 PetscCall(ISGetLocalSize(isrow, &m)); 3212 rstart = mat->rmap->rstart; 3213 PetscCall(PetscMalloc1(m, &idx)); 3214 PetscCall(ISGetIndices(isrow, &is_idx)); 3215 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3216 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3217 3218 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3219 PetscCall(ISGetBlockSize(isrow, &i)); 3220 PetscCall(ISSetBlockSize(*isrow_d, i)); 3221 3222 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3223 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3224 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3225 3226 PetscCall(VecDuplicate(lvec, &lcmap)); 3227 3228 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3229 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3230 3231 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3232 /* off-process column indices */ 3233 count = 0; 3234 PetscCall(PetscMalloc1(Bn, &idx)); 3235 PetscCall(PetscMalloc1(Bn, &cmap1)); 3236 3237 PetscCall(VecGetArray(lvec, &xarray)); 3238 PetscCall(VecGetArray(lcmap, &cmaparray)); 3239 for (i = 0; i < Bn; i++) { 3240 if (PetscRealPart(xarray[i]) > -1.0) { 3241 idx[count] = i; /* local column index in off-diagonal part B */ 3242 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3243 count++; 3244 } 3245 } 3246 PetscCall(VecRestoreArray(lvec, &xarray)); 3247 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3248 3249 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3250 /* cannot ensure iscol_o has same blocksize as iscol! */ 3251 3252 PetscCall(PetscFree(idx)); 3253 *garray = cmap1; 3254 3255 PetscCall(VecDestroy(&x)); 3256 PetscCall(VecDestroy(&cmap)); 3257 PetscCall(VecDestroy(&lcmap)); 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3263 { 3264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3265 Mat M = NULL; 3266 MPI_Comm comm; 3267 IS iscol_d, isrow_d, iscol_o; 3268 Mat Asub = NULL, Bsub = NULL; 3269 PetscInt n; 3270 3271 PetscFunctionBegin; 3272 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3273 3274 if (call == MAT_REUSE_MATRIX) { 3275 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3277 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3280 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3281 3282 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3283 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3284 3285 /* Update diagonal and off-diagonal portions of submat */ 3286 asub = (Mat_MPIAIJ *)(*submat)->data; 3287 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3288 PetscCall(ISGetLocalSize(iscol_o, &n)); 3289 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3290 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3291 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3292 3293 } else { /* call == MAT_INITIAL_MATRIX) */ 3294 const PetscInt *garray; 3295 PetscInt BsubN; 3296 3297 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3298 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3299 3300 /* Create local submatrices Asub and Bsub */ 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3302 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3303 3304 /* Create submatrix M */ 3305 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3306 3307 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3308 asub = (Mat_MPIAIJ *)M->data; 3309 3310 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3311 n = asub->B->cmap->N; 3312 if (BsubN > n) { 3313 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3314 const PetscInt *idx; 3315 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3316 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3317 3318 PetscCall(PetscMalloc1(n, &idx_new)); 3319 j = 0; 3320 PetscCall(ISGetIndices(iscol_o, &idx)); 3321 for (i = 0; i < n; i++) { 3322 if (j >= BsubN) break; 3323 while (subgarray[i] > garray[j]) j++; 3324 3325 if (subgarray[i] == garray[j]) { 3326 idx_new[i] = idx[j++]; 3327 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3328 } 3329 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3330 3331 PetscCall(ISDestroy(&iscol_o)); 3332 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3333 3334 } else if (BsubN < n) { 3335 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3336 } 3337 3338 PetscCall(PetscFree(garray)); 3339 *submat = M; 3340 3341 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3342 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3343 PetscCall(ISDestroy(&isrow_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3346 PetscCall(ISDestroy(&iscol_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3349 PetscCall(ISDestroy(&iscol_o)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3355 { 3356 IS iscol_local = NULL, isrow_d; 3357 PetscInt csize; 3358 PetscInt n, i, j, start, end; 3359 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3360 MPI_Comm comm; 3361 3362 PetscFunctionBegin; 3363 /* If isrow has same processor distribution as mat, 3364 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3365 if (call == MAT_REUSE_MATRIX) { 3366 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3367 if (isrow_d) { 3368 sameRowDist = PETSC_TRUE; 3369 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3370 } else { 3371 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3372 if (iscol_local) { 3373 sameRowDist = PETSC_TRUE; 3374 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3375 } 3376 } 3377 } else { 3378 /* Check if isrow has same processor distribution as mat */ 3379 sameDist[0] = PETSC_FALSE; 3380 PetscCall(ISGetLocalSize(isrow, &n)); 3381 if (!n) { 3382 sameDist[0] = PETSC_TRUE; 3383 } else { 3384 PetscCall(ISGetMinMax(isrow, &i, &j)); 3385 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3386 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3387 } 3388 3389 /* Check if iscol has same processor distribution as mat */ 3390 sameDist[1] = PETSC_FALSE; 3391 PetscCall(ISGetLocalSize(iscol, &n)); 3392 if (!n) { 3393 sameDist[1] = PETSC_TRUE; 3394 } else { 3395 PetscCall(ISGetMinMax(iscol, &i, &j)); 3396 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3397 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3398 } 3399 3400 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3401 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3402 sameRowDist = tsameDist[0]; 3403 } 3404 3405 if (sameRowDist) { 3406 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3407 /* isrow and iscol have same processor distribution as mat */ 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } else { /* sameRowDist */ 3411 /* isrow has same processor distribution as mat */ 3412 if (call == MAT_INITIAL_MATRIX) { 3413 PetscBool sorted; 3414 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3415 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3416 PetscCall(ISGetSize(iscol, &i)); 3417 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3418 3419 PetscCall(ISSorted(iscol_local, &sorted)); 3420 if (sorted) { 3421 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 } else { /* call == MAT_REUSE_MATRIX */ 3426 IS iscol_sub; 3427 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3428 if (iscol_sub) { 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 } 3433 } 3434 } 3435 3436 /* General case: iscol -> iscol_local which has global size of iscol */ 3437 if (call == MAT_REUSE_MATRIX) { 3438 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3439 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3440 } else { 3441 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3442 } 3443 3444 PetscCall(ISGetLocalSize(iscol, &csize)); 3445 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3446 3447 if (call == MAT_INITIAL_MATRIX) { 3448 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3449 PetscCall(ISDestroy(&iscol_local)); 3450 } 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 3454 /*@C 3455 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3456 and "off-diagonal" part of the matrix in CSR format. 3457 3458 Collective 3459 3460 Input Parameters: 3461 + comm - MPI communicator 3462 . A - "diagonal" portion of matrix 3463 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3464 - garray - global index of `B` columns 3465 3466 Output Parameter: 3467 . mat - the matrix, with input `A` as its local diagonal matrix 3468 3469 Level: advanced 3470 3471 Notes: 3472 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3473 3474 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3475 3476 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3477 @*/ 3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3479 { 3480 Mat_MPIAIJ *maij; 3481 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3482 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3483 const PetscScalar *oa; 3484 Mat Bnew; 3485 PetscInt m, n, N; 3486 MatType mpi_mat_type; 3487 3488 PetscFunctionBegin; 3489 PetscCall(MatCreate(comm, mat)); 3490 PetscCall(MatGetSize(A, &m, &n)); 3491 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3492 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3493 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3494 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3495 3496 /* Get global columns of mat */ 3497 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3498 3499 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3500 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3501 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3502 PetscCall(MatSetType(*mat, mpi_mat_type)); 3503 3504 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3505 maij = (Mat_MPIAIJ *)(*mat)->data; 3506 3507 (*mat)->preallocated = PETSC_TRUE; 3508 3509 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3510 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3511 3512 /* Set A as diagonal portion of *mat */ 3513 maij->A = A; 3514 3515 nz = oi[m]; 3516 for (i = 0; i < nz; i++) { 3517 col = oj[i]; 3518 oj[i] = garray[col]; 3519 } 3520 3521 /* Set Bnew as off-diagonal portion of *mat */ 3522 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3523 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3524 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3525 bnew = (Mat_SeqAIJ *)Bnew->data; 3526 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3527 maij->B = Bnew; 3528 3529 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3530 3531 b->free_a = PETSC_FALSE; 3532 b->free_ij = PETSC_FALSE; 3533 PetscCall(MatDestroy(&B)); 3534 3535 bnew->free_a = PETSC_TRUE; 3536 bnew->free_ij = PETSC_TRUE; 3537 3538 /* condense columns of maij->B */ 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3540 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3542 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3543 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3544 PetscFunctionReturn(PETSC_SUCCESS); 3545 } 3546 3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3548 3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3550 { 3551 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3552 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3553 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3554 Mat M, Msub, B = a->B; 3555 MatScalar *aa; 3556 Mat_SeqAIJ *aij; 3557 PetscInt *garray = a->garray, *colsub, Ncols; 3558 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3559 IS iscol_sub, iscmap; 3560 const PetscInt *is_idx, *cmap; 3561 PetscBool allcolumns = PETSC_FALSE; 3562 MPI_Comm comm; 3563 3564 PetscFunctionBegin; 3565 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3566 if (call == MAT_REUSE_MATRIX) { 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3568 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3569 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3572 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3573 3574 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3575 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3576 3577 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3578 3579 } else { /* call == MAT_INITIAL_MATRIX) */ 3580 PetscBool flg; 3581 3582 PetscCall(ISGetLocalSize(iscol, &n)); 3583 PetscCall(ISGetSize(iscol, &Ncols)); 3584 3585 /* (1) iscol -> nonscalable iscol_local */ 3586 /* Check for special case: each processor gets entire matrix columns */ 3587 PetscCall(ISIdentity(iscol_local, &flg)); 3588 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3589 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3590 if (allcolumns) { 3591 iscol_sub = iscol_local; 3592 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3593 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3594 3595 } else { 3596 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3597 PetscInt *idx, *cmap1, k; 3598 PetscCall(PetscMalloc1(Ncols, &idx)); 3599 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3600 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3601 count = 0; 3602 k = 0; 3603 for (i = 0; i < Ncols; i++) { 3604 j = is_idx[i]; 3605 if (j >= cstart && j < cend) { 3606 /* diagonal part of mat */ 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } else if (Bn) { 3610 /* off-diagonal part of mat */ 3611 if (j == garray[k]) { 3612 idx[count] = j; 3613 cmap1[count++] = i; /* column index in submat */ 3614 } else if (j > garray[k]) { 3615 while (j > garray[k] && k < Bn - 1) k++; 3616 if (j == garray[k]) { 3617 idx[count] = j; 3618 cmap1[count++] = i; /* column index in submat */ 3619 } 3620 } 3621 } 3622 } 3623 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3624 3625 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3626 PetscCall(ISGetBlockSize(iscol, &cbs)); 3627 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3628 3629 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3630 } 3631 3632 /* (3) Create sequential Msub */ 3633 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3634 } 3635 3636 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3637 aij = (Mat_SeqAIJ *)Msub->data; 3638 ii = aij->i; 3639 PetscCall(ISGetIndices(iscmap, &cmap)); 3640 3641 /* 3642 m - number of local rows 3643 Ncols - number of columns (same on all processors) 3644 rstart - first row in new global matrix generated 3645 */ 3646 PetscCall(MatGetSize(Msub, &m, NULL)); 3647 3648 if (call == MAT_INITIAL_MATRIX) { 3649 /* (4) Create parallel newmat */ 3650 PetscMPIInt rank, size; 3651 PetscInt csize; 3652 3653 PetscCallMPI(MPI_Comm_size(comm, &size)); 3654 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3655 3656 /* 3657 Determine the number of non-zeros in the diagonal and off-diagonal 3658 portions of the matrix in order to do correct preallocation 3659 */ 3660 3661 /* first get start and end of "diagonal" columns */ 3662 PetscCall(ISGetLocalSize(iscol, &csize)); 3663 if (csize == PETSC_DECIDE) { 3664 PetscCall(ISGetSize(isrow, &mglobal)); 3665 if (mglobal == Ncols) { /* square matrix */ 3666 nlocal = m; 3667 } else { 3668 nlocal = Ncols / size + ((Ncols % size) > rank); 3669 } 3670 } else { 3671 nlocal = csize; 3672 } 3673 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3674 rstart = rend - nlocal; 3675 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3676 3677 /* next, compute all the lengths */ 3678 jj = aij->j; 3679 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3680 olens = dlens + m; 3681 for (i = 0; i < m; i++) { 3682 jend = ii[i + 1] - ii[i]; 3683 olen = 0; 3684 dlen = 0; 3685 for (j = 0; j < jend; j++) { 3686 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3687 else dlen++; 3688 jj++; 3689 } 3690 olens[i] = olen; 3691 dlens[i] = dlen; 3692 } 3693 3694 PetscCall(ISGetBlockSize(isrow, &bs)); 3695 PetscCall(ISGetBlockSize(iscol, &cbs)); 3696 3697 PetscCall(MatCreate(comm, &M)); 3698 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3699 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3700 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3701 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3702 PetscCall(PetscFree(dlens)); 3703 3704 } else { /* call == MAT_REUSE_MATRIX */ 3705 M = *newmat; 3706 PetscCall(MatGetLocalSize(M, &i, NULL)); 3707 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3708 PetscCall(MatZeroEntries(M)); 3709 /* 3710 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3711 rather than the slower MatSetValues(). 3712 */ 3713 M->was_assembled = PETSC_TRUE; 3714 M->assembled = PETSC_FALSE; 3715 } 3716 3717 /* (5) Set values of Msub to *newmat */ 3718 PetscCall(PetscMalloc1(count, &colsub)); 3719 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3720 3721 jj = aij->j; 3722 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3723 for (i = 0; i < m; i++) { 3724 row = rstart + i; 3725 nz = ii[i + 1] - ii[i]; 3726 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3727 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3728 jj += nz; 3729 aa += nz; 3730 } 3731 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3732 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3733 3734 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3735 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3736 3737 PetscCall(PetscFree(colsub)); 3738 3739 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3740 if (call == MAT_INITIAL_MATRIX) { 3741 *newmat = M; 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3743 PetscCall(MatDestroy(&Msub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3746 PetscCall(ISDestroy(&iscol_sub)); 3747 3748 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3749 PetscCall(ISDestroy(&iscmap)); 3750 3751 if (iscol_local) { 3752 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3753 PetscCall(ISDestroy(&iscol_local)); 3754 } 3755 } 3756 PetscFunctionReturn(PETSC_SUCCESS); 3757 } 3758 3759 /* 3760 Not great since it makes two copies of the submatrix, first an SeqAIJ 3761 in local and then by concatenating the local matrices the end result. 3762 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3763 3764 This requires a sequential iscol with all indices. 3765 */ 3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3767 { 3768 PetscMPIInt rank, size; 3769 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3770 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3771 Mat M, Mreuse; 3772 MatScalar *aa, *vwork; 3773 MPI_Comm comm; 3774 Mat_SeqAIJ *aij; 3775 PetscBool colflag, allcolumns = PETSC_FALSE; 3776 3777 PetscFunctionBegin; 3778 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3779 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3780 PetscCallMPI(MPI_Comm_size(comm, &size)); 3781 3782 /* Check for special case: each processor gets entire matrix columns */ 3783 PetscCall(ISIdentity(iscol, &colflag)); 3784 PetscCall(ISGetLocalSize(iscol, &n)); 3785 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3786 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3787 3788 if (call == MAT_REUSE_MATRIX) { 3789 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3790 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3791 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3792 } else { 3793 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3794 } 3795 3796 /* 3797 m - number of local rows 3798 n - number of columns (same on all processors) 3799 rstart - first row in new global matrix generated 3800 */ 3801 PetscCall(MatGetSize(Mreuse, &m, &n)); 3802 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3803 if (call == MAT_INITIAL_MATRIX) { 3804 aij = (Mat_SeqAIJ *)Mreuse->data; 3805 ii = aij->i; 3806 jj = aij->j; 3807 3808 /* 3809 Determine the number of non-zeros in the diagonal and off-diagonal 3810 portions of the matrix in order to do correct preallocation 3811 */ 3812 3813 /* first get start and end of "diagonal" columns */ 3814 if (csize == PETSC_DECIDE) { 3815 PetscCall(ISGetSize(isrow, &mglobal)); 3816 if (mglobal == n) { /* square matrix */ 3817 nlocal = m; 3818 } else { 3819 nlocal = n / size + ((n % size) > rank); 3820 } 3821 } else { 3822 nlocal = csize; 3823 } 3824 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3825 rstart = rend - nlocal; 3826 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3827 3828 /* next, compute all the lengths */ 3829 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3830 olens = dlens + m; 3831 for (i = 0; i < m; i++) { 3832 jend = ii[i + 1] - ii[i]; 3833 olen = 0; 3834 dlen = 0; 3835 for (j = 0; j < jend; j++) { 3836 if (*jj < rstart || *jj >= rend) olen++; 3837 else dlen++; 3838 jj++; 3839 } 3840 olens[i] = olen; 3841 dlens[i] = dlen; 3842 } 3843 PetscCall(MatCreate(comm, &M)); 3844 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3845 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3846 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3847 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3848 PetscCall(PetscFree(dlens)); 3849 } else { 3850 PetscInt ml, nl; 3851 3852 M = *newmat; 3853 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3854 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3855 PetscCall(MatZeroEntries(M)); 3856 /* 3857 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3858 rather than the slower MatSetValues(). 3859 */ 3860 M->was_assembled = PETSC_TRUE; 3861 M->assembled = PETSC_FALSE; 3862 } 3863 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3864 aij = (Mat_SeqAIJ *)Mreuse->data; 3865 ii = aij->i; 3866 jj = aij->j; 3867 3868 /* trigger copy to CPU if needed */ 3869 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3870 for (i = 0; i < m; i++) { 3871 row = rstart + i; 3872 nz = ii[i + 1] - ii[i]; 3873 cwork = jj; 3874 jj = PetscSafePointerPlusOffset(jj, nz); 3875 vwork = aa; 3876 aa = PetscSafePointerPlusOffset(aa, nz); 3877 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3878 } 3879 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3880 3881 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3882 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3883 *newmat = M; 3884 3885 /* save submatrix used in processor for next request */ 3886 if (call == MAT_INITIAL_MATRIX) { 3887 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3888 PetscCall(MatDestroy(&Mreuse)); 3889 } 3890 PetscFunctionReturn(PETSC_SUCCESS); 3891 } 3892 3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3894 { 3895 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3896 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3897 const PetscInt *JJ; 3898 PetscBool nooffprocentries; 3899 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3900 3901 PetscFunctionBegin; 3902 PetscCall(PetscLayoutSetUp(B->rmap)); 3903 PetscCall(PetscLayoutSetUp(B->cmap)); 3904 m = B->rmap->n; 3905 cstart = B->cmap->rstart; 3906 cend = B->cmap->rend; 3907 rstart = B->rmap->rstart; 3908 irstart = Ii[0]; 3909 3910 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3911 3912 if (PetscDefined(USE_DEBUG)) { 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3916 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3917 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3918 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3919 } 3920 } 3921 3922 for (i = 0; i < m; i++) { 3923 nnz = Ii[i + 1] - Ii[i]; 3924 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3925 nnz_max = PetscMax(nnz_max, nnz); 3926 d = 0; 3927 for (j = 0; j < nnz; j++) { 3928 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3929 } 3930 d_nnz[i] = d; 3931 o_nnz[i] = nnz - d; 3932 } 3933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3934 PetscCall(PetscFree2(d_nnz, o_nnz)); 3935 3936 for (i = 0; i < m; i++) { 3937 ii = i + rstart; 3938 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3939 } 3940 nooffprocentries = B->nooffprocentries; 3941 B->nooffprocentries = PETSC_TRUE; 3942 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3943 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3944 B->nooffprocentries = nooffprocentries; 3945 3946 /* count number of entries below block diagonal */ 3947 PetscCall(PetscFree(Aij->ld)); 3948 PetscCall(PetscCalloc1(m, &ld)); 3949 Aij->ld = ld; 3950 for (i = 0; i < m; i++) { 3951 nnz = Ii[i + 1] - Ii[i]; 3952 j = 0; 3953 while (j < nnz && J[j] < cstart) j++; 3954 ld[i] = j; 3955 if (J) J += nnz; 3956 } 3957 3958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 /*@ 3963 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3964 (the default parallel PETSc format). 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . i - the indices into `j` for the start of each local row (indices start with zero) 3971 . j - the column indices for each local row (indices start with zero) 3972 - v - optional values in the matrix 3973 3974 Level: developer 3975 3976 Notes: 3977 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3978 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3979 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3980 3981 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3982 3983 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3984 3985 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3986 3987 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3988 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3989 3990 The format which is used for the sparse matrix input, is equivalent to a 3991 row-major ordering.. i.e for the following matrix, the input data expected is 3992 as shown 3993 .vb 3994 1 0 0 3995 2 0 3 P0 3996 ------- 3997 4 5 6 P1 3998 3999 Process0 [P0] rows_owned=[0,1] 4000 i = {0,1,3} [size = nrow+1 = 2+1] 4001 j = {0,0,2} [size = 3] 4002 v = {1,2,3} [size = 3] 4003 4004 Process1 [P1] rows_owned=[2] 4005 i = {0,3} [size = nrow+1 = 1+1] 4006 j = {0,1,2} [size = 3] 4007 v = {4,5,6} [size = 3] 4008 .ve 4009 4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4011 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4012 @*/ 4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4014 { 4015 PetscFunctionBegin; 4016 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4017 PetscFunctionReturn(PETSC_SUCCESS); 4018 } 4019 4020 /*@ 4021 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4022 (the default parallel PETSc format). For good matrix assembly performance 4023 the user should preallocate the matrix storage by setting the parameters 4024 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4025 4026 Collective 4027 4028 Input Parameters: 4029 + B - the matrix 4030 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4031 (same value is used for all local rows) 4032 . d_nnz - array containing the number of nonzeros in the various rows of the 4033 DIAGONAL portion of the local submatrix (possibly different for each row) 4034 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4035 The size of this array is equal to the number of local rows, i.e 'm'. 4036 For matrices that will be factored, you must leave room for (and set) 4037 the diagonal entry even if it is zero. 4038 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4039 submatrix (same value is used for all local rows). 4040 - o_nnz - array containing the number of nonzeros in the various rows of the 4041 OFF-DIAGONAL portion of the local submatrix (possibly different for 4042 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4043 structure. The size of this array is equal to the number 4044 of local rows, i.e 'm'. 4045 4046 Example Usage: 4047 Consider the following 8x8 matrix with 34 non-zero values, that is 4048 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4049 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4050 as follows 4051 4052 .vb 4053 1 2 0 | 0 3 0 | 0 4 4054 Proc0 0 5 6 | 7 0 0 | 8 0 4055 9 0 10 | 11 0 0 | 12 0 4056 ------------------------------------- 4057 13 0 14 | 15 16 17 | 0 0 4058 Proc1 0 18 0 | 19 20 21 | 0 0 4059 0 0 0 | 22 23 0 | 24 0 4060 ------------------------------------- 4061 Proc2 25 26 27 | 0 0 28 | 29 0 4062 30 0 0 | 31 32 33 | 0 34 4063 .ve 4064 4065 This can be represented as a collection of submatrices as 4066 .vb 4067 A B C 4068 D E F 4069 G H I 4070 .ve 4071 4072 Where the submatrices A,B,C are owned by proc0, D,E,F are 4073 owned by proc1, G,H,I are owned by proc2. 4074 4075 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4076 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4077 The 'M','N' parameters are 8,8, and have the same values on all procs. 4078 4079 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4080 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4081 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4082 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4083 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4084 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4085 4086 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4087 allocated for every row of the local diagonal submatrix, and `o_nz` 4088 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4089 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4090 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4091 In this case, the values of `d_nz`, `o_nz` are 4092 .vb 4093 proc0 dnz = 2, o_nz = 2 4094 proc1 dnz = 3, o_nz = 2 4095 proc2 dnz = 1, o_nz = 4 4096 .ve 4097 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4098 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4099 for proc3. i.e we are using 12+15+10=37 storage locations to store 4100 34 values. 4101 4102 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4103 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4104 In the above case the values for `d_nnz`, `o_nnz` are 4105 .vb 4106 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4107 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4108 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4109 .ve 4110 Here the space allocated is sum of all the above values i.e 34, and 4111 hence pre-allocation is perfect. 4112 4113 Level: intermediate 4114 4115 Notes: 4116 If the *_nnz parameter is given then the *_nz parameter is ignored 4117 4118 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4119 storage. The stored row and column indices begin with zero. 4120 See [Sparse Matrices](sec_matsparse) for details. 4121 4122 The parallel matrix is partitioned such that the first m0 rows belong to 4123 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4124 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4125 4126 The DIAGONAL portion of the local submatrix of a processor can be defined 4127 as the submatrix which is obtained by extraction the part corresponding to 4128 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4129 first row that belongs to the processor, r2 is the last row belonging to 4130 the this processor, and c1-c2 is range of indices of the local part of a 4131 vector suitable for applying the matrix to. This is an mxn matrix. In the 4132 common case of a square matrix, the row and column ranges are the same and 4133 the DIAGONAL part is also square. The remaining portion of the local 4134 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4135 4136 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4137 4138 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4140 You can also run with the option `-info` and look for messages with the string 4141 malloc in them to see if additional memory allocation was needed. 4142 4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4144 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4145 @*/ 4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4147 { 4148 PetscFunctionBegin; 4149 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4150 PetscValidType(B, 1); 4151 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4152 PetscFunctionReturn(PETSC_SUCCESS); 4153 } 4154 4155 /*@ 4156 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4157 CSR format for the local rows. 4158 4159 Collective 4160 4161 Input Parameters: 4162 + comm - MPI communicator 4163 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4164 . n - This value should be the same as the local size used in creating the 4165 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4166 calculated if `N` is given) For square matrices n is almost always `m`. 4167 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4168 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4169 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4170 . j - global column indices 4171 - a - optional matrix values 4172 4173 Output Parameter: 4174 . mat - the matrix 4175 4176 Level: intermediate 4177 4178 Notes: 4179 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4180 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4181 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4182 4183 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4184 4185 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4186 4187 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4188 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4189 4190 The format which is used for the sparse matrix input, is equivalent to a 4191 row-major ordering, i.e., for the following matrix, the input data expected is 4192 as shown 4193 .vb 4194 1 0 0 4195 2 0 3 P0 4196 ------- 4197 4 5 6 P1 4198 4199 Process0 [P0] rows_owned=[0,1] 4200 i = {0,1,3} [size = nrow+1 = 2+1] 4201 j = {0,0,2} [size = 3] 4202 v = {1,2,3} [size = 3] 4203 4204 Process1 [P1] rows_owned=[2] 4205 i = {0,3} [size = nrow+1 = 1+1] 4206 j = {0,1,2} [size = 3] 4207 v = {4,5,6} [size = 3] 4208 .ve 4209 4210 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4211 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4212 @*/ 4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4214 { 4215 PetscFunctionBegin; 4216 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4217 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4218 PetscCall(MatCreate(comm, mat)); 4219 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4220 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4221 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4222 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4223 PetscFunctionReturn(PETSC_SUCCESS); 4224 } 4225 4226 /*@ 4227 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4228 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4229 from `MatCreateMPIAIJWithArrays()` 4230 4231 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4232 4233 Collective 4234 4235 Input Parameters: 4236 + mat - the matrix 4237 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4238 . n - This value should be the same as the local size used in creating the 4239 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4240 calculated if N is given) For square matrices n is almost always m. 4241 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4242 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4243 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4244 . J - column indices 4245 - v - matrix values 4246 4247 Level: deprecated 4248 4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4250 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4251 @*/ 4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4253 { 4254 PetscInt nnz, i; 4255 PetscBool nooffprocentries; 4256 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4257 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4258 PetscScalar *ad, *ao; 4259 PetscInt ldi, Iii, md; 4260 const PetscInt *Adi = Ad->i; 4261 PetscInt *ld = Aij->ld; 4262 4263 PetscFunctionBegin; 4264 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4265 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4266 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4267 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4268 4269 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4270 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4271 4272 for (i = 0; i < m; i++) { 4273 if (PetscDefined(USE_DEBUG)) { 4274 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4275 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4276 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4277 } 4278 } 4279 nnz = Ii[i + 1] - Ii[i]; 4280 Iii = Ii[i]; 4281 ldi = ld[i]; 4282 md = Adi[i + 1] - Adi[i]; 4283 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4284 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4285 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4286 ad += md; 4287 ao += nnz - md; 4288 } 4289 nooffprocentries = mat->nooffprocentries; 4290 mat->nooffprocentries = PETSC_TRUE; 4291 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4293 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4296 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4297 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4298 mat->nooffprocentries = nooffprocentries; 4299 PetscFunctionReturn(PETSC_SUCCESS); 4300 } 4301 4302 /*@ 4303 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4304 4305 Collective 4306 4307 Input Parameters: 4308 + mat - the matrix 4309 - v - matrix values, stored by row 4310 4311 Level: intermediate 4312 4313 Notes: 4314 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4315 4316 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4317 4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4319 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4320 @*/ 4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4322 { 4323 PetscInt nnz, i, m; 4324 PetscBool nooffprocentries; 4325 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4326 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4327 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4328 PetscScalar *ad, *ao; 4329 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4330 PetscInt ldi, Iii, md; 4331 PetscInt *ld = Aij->ld; 4332 4333 PetscFunctionBegin; 4334 m = mat->rmap->n; 4335 4336 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4337 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4338 Iii = 0; 4339 for (i = 0; i < m; i++) { 4340 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4341 ldi = ld[i]; 4342 md = Adi[i + 1] - Adi[i]; 4343 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4344 ad += md; 4345 if (ao) { 4346 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4347 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4348 ao += nnz - md; 4349 } 4350 Iii += nnz; 4351 } 4352 nooffprocentries = mat->nooffprocentries; 4353 mat->nooffprocentries = PETSC_TRUE; 4354 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4355 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4356 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4357 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4359 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4360 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4361 mat->nooffprocentries = nooffprocentries; 4362 PetscFunctionReturn(PETSC_SUCCESS); 4363 } 4364 4365 /*@ 4366 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4367 (the default parallel PETSc format). For good matrix assembly performance 4368 the user should preallocate the matrix storage by setting the parameters 4369 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4370 4371 Collective 4372 4373 Input Parameters: 4374 + comm - MPI communicator 4375 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4376 This value should be the same as the local size used in creating the 4377 y vector for the matrix-vector product y = Ax. 4378 . n - This value should be the same as the local size used in creating the 4379 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4380 calculated if N is given) For square matrices n is almost always m. 4381 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4382 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4383 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4384 (same value is used for all local rows) 4385 . d_nnz - array containing the number of nonzeros in the various rows of the 4386 DIAGONAL portion of the local submatrix (possibly different for each row) 4387 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4388 The size of this array is equal to the number of local rows, i.e 'm'. 4389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4390 submatrix (same value is used for all local rows). 4391 - o_nnz - array containing the number of nonzeros in the various rows of the 4392 OFF-DIAGONAL portion of the local submatrix (possibly different for 4393 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4394 structure. The size of this array is equal to the number 4395 of local rows, i.e 'm'. 4396 4397 Output Parameter: 4398 . A - the matrix 4399 4400 Options Database Keys: 4401 + -mat_no_inode - Do not use inodes 4402 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4404 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4405 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4406 4407 Level: intermediate 4408 4409 Notes: 4410 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4411 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4412 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4413 4414 If the *_nnz parameter is given then the *_nz parameter is ignored 4415 4416 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4417 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4418 storage requirements for this matrix. 4419 4420 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4421 processor than it must be used on all processors that share the object for 4422 that argument. 4423 4424 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4425 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4426 4427 The user MUST specify either the local or global matrix dimensions 4428 (possibly both). 4429 4430 The parallel matrix is partitioned across processors such that the 4431 first `m0` rows belong to process 0, the next `m1` rows belong to 4432 process 1, the next `m2` rows belong to process 2, etc., where 4433 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4434 values corresponding to [m x N] submatrix. 4435 4436 The columns are logically partitioned with the n0 columns belonging 4437 to 0th partition, the next n1 columns belonging to the next 4438 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4439 4440 The DIAGONAL portion of the local submatrix on any given processor 4441 is the submatrix corresponding to the rows and columns m,n 4442 corresponding to the given processor. i.e diagonal matrix on 4443 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4444 etc. The remaining portion of the local submatrix [m x (N-n)] 4445 constitute the OFF-DIAGONAL portion. The example below better 4446 illustrates this concept. 4447 4448 For a square global matrix we define each processor's diagonal portion 4449 to be its local rows and the corresponding columns (a square submatrix); 4450 each processor's off-diagonal portion encompasses the remainder of the 4451 local matrix (a rectangular submatrix). 4452 4453 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4454 4455 When calling this routine with a single process communicator, a matrix of 4456 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4457 type of communicator, use the construction mechanism 4458 .vb 4459 MatCreate(..., &A); 4460 MatSetType(A, MATMPIAIJ); 4461 MatSetSizes(A, m, n, M, N); 4462 MatMPIAIJSetPreallocation(A, ...); 4463 .ve 4464 4465 By default, this format uses inodes (identical nodes) when possible. 4466 We search for consecutive rows with the same nonzero structure, thereby 4467 reusing matrix information to achieve increased efficiency. 4468 4469 Example Usage: 4470 Consider the following 8x8 matrix with 34 non-zero values, that is 4471 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4472 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4473 as follows 4474 4475 .vb 4476 1 2 0 | 0 3 0 | 0 4 4477 Proc0 0 5 6 | 7 0 0 | 8 0 4478 9 0 10 | 11 0 0 | 12 0 4479 ------------------------------------- 4480 13 0 14 | 15 16 17 | 0 0 4481 Proc1 0 18 0 | 19 20 21 | 0 0 4482 0 0 0 | 22 23 0 | 24 0 4483 ------------------------------------- 4484 Proc2 25 26 27 | 0 0 28 | 29 0 4485 30 0 0 | 31 32 33 | 0 34 4486 .ve 4487 4488 This can be represented as a collection of submatrices as 4489 4490 .vb 4491 A B C 4492 D E F 4493 G H I 4494 .ve 4495 4496 Where the submatrices A,B,C are owned by proc0, D,E,F are 4497 owned by proc1, G,H,I are owned by proc2. 4498 4499 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4500 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4501 The 'M','N' parameters are 8,8, and have the same values on all procs. 4502 4503 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4504 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4505 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4506 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4507 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4508 matrix, ans [DF] as another SeqAIJ matrix. 4509 4510 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4511 allocated for every row of the local diagonal submatrix, and `o_nz` 4512 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4513 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4514 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4515 In this case, the values of `d_nz`,`o_nz` are 4516 .vb 4517 proc0 dnz = 2, o_nz = 2 4518 proc1 dnz = 3, o_nz = 2 4519 proc2 dnz = 1, o_nz = 4 4520 .ve 4521 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4522 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4523 for proc3. i.e we are using 12+15+10=37 storage locations to store 4524 34 values. 4525 4526 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4527 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4528 In the above case the values for d_nnz,o_nnz are 4529 .vb 4530 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4531 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4532 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4533 .ve 4534 Here the space allocated is sum of all the above values i.e 34, and 4535 hence pre-allocation is perfect. 4536 4537 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4538 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4539 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4540 @*/ 4541 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4542 { 4543 PetscMPIInt size; 4544 4545 PetscFunctionBegin; 4546 PetscCall(MatCreate(comm, A)); 4547 PetscCall(MatSetSizes(*A, m, n, M, N)); 4548 PetscCallMPI(MPI_Comm_size(comm, &size)); 4549 if (size > 1) { 4550 PetscCall(MatSetType(*A, MATMPIAIJ)); 4551 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4552 } else { 4553 PetscCall(MatSetType(*A, MATSEQAIJ)); 4554 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4555 } 4556 PetscFunctionReturn(PETSC_SUCCESS); 4557 } 4558 4559 /*MC 4560 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4561 4562 Synopsis: 4563 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4564 4565 Not Collective 4566 4567 Input Parameter: 4568 . A - the `MATMPIAIJ` matrix 4569 4570 Output Parameters: 4571 + Ad - the diagonal portion of the matrix 4572 . Ao - the off-diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 Note: 4579 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4580 4581 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4582 M*/ 4583 4584 /*MC 4585 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4586 4587 Synopsis: 4588 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4589 4590 Not Collective 4591 4592 Input Parameters: 4593 + A - the `MATMPIAIJ` matrix 4594 . Ad - the diagonal portion of the matrix 4595 . Ao - the off-diagonal portion of the matrix 4596 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4597 - ierr - error code 4598 4599 Level: advanced 4600 4601 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4602 M*/ 4603 4604 /*@C 4605 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4606 4607 Not Collective 4608 4609 Input Parameter: 4610 . A - The `MATMPIAIJ` matrix 4611 4612 Output Parameters: 4613 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4614 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4615 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4616 4617 Level: intermediate 4618 4619 Note: 4620 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4621 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4622 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4623 local column numbers to global column numbers in the original matrix. 4624 4625 Fortran Notes: 4626 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4627 4628 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4629 @*/ 4630 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4631 { 4632 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4633 PetscBool flg; 4634 4635 PetscFunctionBegin; 4636 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4637 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4638 if (Ad) *Ad = a->A; 4639 if (Ao) *Ao = a->B; 4640 if (colmap) *colmap = a->garray; 4641 PetscFunctionReturn(PETSC_SUCCESS); 4642 } 4643 4644 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4645 { 4646 PetscInt m, N, i, rstart, nnz, Ii; 4647 PetscInt *indx; 4648 PetscScalar *values; 4649 MatType rootType; 4650 4651 PetscFunctionBegin; 4652 PetscCall(MatGetSize(inmat, &m, &N)); 4653 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4654 PetscInt *dnz, *onz, sum, bs, cbs; 4655 4656 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4657 /* Check sum(n) = N */ 4658 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4659 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4660 4661 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4662 rstart -= m; 4663 4664 MatPreallocateBegin(comm, m, n, dnz, onz); 4665 for (i = 0; i < m; i++) { 4666 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4667 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4668 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4669 } 4670 4671 PetscCall(MatCreate(comm, outmat)); 4672 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4673 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4674 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4675 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4676 PetscCall(MatSetType(*outmat, rootType)); 4677 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4678 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4679 MatPreallocateEnd(dnz, onz); 4680 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4681 } 4682 4683 /* numeric phase */ 4684 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4685 for (i = 0; i < m; i++) { 4686 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4687 Ii = i + rstart; 4688 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4689 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4690 } 4691 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4692 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4693 PetscFunctionReturn(PETSC_SUCCESS); 4694 } 4695 4696 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4697 { 4698 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4699 4700 PetscFunctionBegin; 4701 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4702 PetscCall(PetscFree(merge->id_r)); 4703 PetscCall(PetscFree(merge->len_s)); 4704 PetscCall(PetscFree(merge->len_r)); 4705 PetscCall(PetscFree(merge->bi)); 4706 PetscCall(PetscFree(merge->bj)); 4707 PetscCall(PetscFree(merge->buf_ri[0])); 4708 PetscCall(PetscFree(merge->buf_ri)); 4709 PetscCall(PetscFree(merge->buf_rj[0])); 4710 PetscCall(PetscFree(merge->buf_rj)); 4711 PetscCall(PetscFree(merge->coi)); 4712 PetscCall(PetscFree(merge->coj)); 4713 PetscCall(PetscFree(merge->owners_co)); 4714 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4715 PetscCall(PetscFree(merge)); 4716 PetscFunctionReturn(PETSC_SUCCESS); 4717 } 4718 4719 #include <../src/mat/utils/freespace.h> 4720 #include <petscbt.h> 4721 4722 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4723 { 4724 MPI_Comm comm; 4725 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4726 PetscMPIInt size, rank, taga, *len_s; 4727 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4728 PetscMPIInt proc, k; 4729 PetscInt **buf_ri, **buf_rj; 4730 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4731 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4732 MPI_Request *s_waits, *r_waits; 4733 MPI_Status *status; 4734 const MatScalar *aa, *a_a; 4735 MatScalar **abuf_r, *ba_i; 4736 Mat_Merge_SeqsToMPI *merge; 4737 PetscContainer container; 4738 4739 PetscFunctionBegin; 4740 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4741 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4742 4743 PetscCallMPI(MPI_Comm_size(comm, &size)); 4744 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4745 4746 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4747 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4748 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4749 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4750 aa = a_a; 4751 4752 bi = merge->bi; 4753 bj = merge->bj; 4754 buf_ri = merge->buf_ri; 4755 buf_rj = merge->buf_rj; 4756 4757 PetscCall(PetscMalloc1(size, &status)); 4758 owners = merge->rowmap->range; 4759 len_s = merge->len_s; 4760 4761 /* send and recv matrix values */ 4762 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4763 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4764 4765 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4766 for (proc = 0, k = 0; proc < size; proc++) { 4767 if (!len_s[proc]) continue; 4768 i = owners[proc]; 4769 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4770 k++; 4771 } 4772 4773 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4774 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4775 PetscCall(PetscFree(status)); 4776 4777 PetscCall(PetscFree(s_waits)); 4778 PetscCall(PetscFree(r_waits)); 4779 4780 /* insert mat values of mpimat */ 4781 PetscCall(PetscMalloc1(N, &ba_i)); 4782 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4783 4784 for (k = 0; k < merge->nrecv; k++) { 4785 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4786 nrows = *buf_ri_k[k]; 4787 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4788 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4789 } 4790 4791 /* set values of ba */ 4792 m = merge->rowmap->n; 4793 for (i = 0; i < m; i++) { 4794 arow = owners[rank] + i; 4795 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4796 bnzi = bi[i + 1] - bi[i]; 4797 PetscCall(PetscArrayzero(ba_i, bnzi)); 4798 4799 /* add local non-zero vals of this proc's seqmat into ba */ 4800 anzi = ai[arow + 1] - ai[arow]; 4801 aj = a->j + ai[arow]; 4802 aa = a_a + ai[arow]; 4803 nextaj = 0; 4804 for (j = 0; nextaj < anzi; j++) { 4805 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4806 ba_i[j] += aa[nextaj++]; 4807 } 4808 } 4809 4810 /* add received vals into ba */ 4811 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4812 /* i-th row */ 4813 if (i == *nextrow[k]) { 4814 anzi = *(nextai[k] + 1) - *nextai[k]; 4815 aj = buf_rj[k] + *nextai[k]; 4816 aa = abuf_r[k] + *nextai[k]; 4817 nextaj = 0; 4818 for (j = 0; nextaj < anzi; j++) { 4819 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4820 ba_i[j] += aa[nextaj++]; 4821 } 4822 } 4823 nextrow[k]++; 4824 nextai[k]++; 4825 } 4826 } 4827 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4828 } 4829 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4830 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4831 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4832 4833 PetscCall(PetscFree(abuf_r[0])); 4834 PetscCall(PetscFree(abuf_r)); 4835 PetscCall(PetscFree(ba_i)); 4836 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4837 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4838 PetscFunctionReturn(PETSC_SUCCESS); 4839 } 4840 4841 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4842 { 4843 Mat B_mpi; 4844 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4845 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4846 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4847 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4848 PetscInt len, *dnz, *onz, bs, cbs; 4849 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4850 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4851 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4852 MPI_Status *status; 4853 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4854 PetscBT lnkbt; 4855 Mat_Merge_SeqsToMPI *merge; 4856 PetscContainer container; 4857 4858 PetscFunctionBegin; 4859 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4860 4861 /* make sure it is a PETSc comm */ 4862 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4863 PetscCallMPI(MPI_Comm_size(comm, &size)); 4864 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4865 4866 PetscCall(PetscNew(&merge)); 4867 PetscCall(PetscMalloc1(size, &status)); 4868 4869 /* determine row ownership */ 4870 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4871 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4872 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4873 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4874 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4875 PetscCall(PetscMalloc1(size, &len_si)); 4876 PetscCall(PetscMalloc1(size, &merge->len_s)); 4877 4878 m = merge->rowmap->n; 4879 owners = merge->rowmap->range; 4880 4881 /* determine the number of messages to send, their lengths */ 4882 len_s = merge->len_s; 4883 4884 len = 0; /* length of buf_si[] */ 4885 merge->nsend = 0; 4886 for (PetscMPIInt proc = 0; proc < size; proc++) { 4887 len_si[proc] = 0; 4888 if (proc == rank) { 4889 len_s[proc] = 0; 4890 } else { 4891 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4892 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4893 } 4894 if (len_s[proc]) { 4895 merge->nsend++; 4896 nrows = 0; 4897 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4898 if (ai[i + 1] > ai[i]) nrows++; 4899 } 4900 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4901 len += len_si[proc]; 4902 } 4903 } 4904 4905 /* determine the number and length of messages to receive for ij-structure */ 4906 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4907 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4908 4909 /* post the Irecv of j-structure */ 4910 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4911 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4912 4913 /* post the Isend of j-structure */ 4914 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4915 4916 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4917 if (!len_s[proc]) continue; 4918 i = owners[proc]; 4919 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4920 k++; 4921 } 4922 4923 /* receives and sends of j-structure are complete */ 4924 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4925 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4926 4927 /* send and recv i-structure */ 4928 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4929 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4930 4931 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4932 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4933 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4934 if (!len_s[proc]) continue; 4935 /* form outgoing message for i-structure: 4936 buf_si[0]: nrows to be sent 4937 [1:nrows]: row index (global) 4938 [nrows+1:2*nrows+1]: i-structure index 4939 */ 4940 nrows = len_si[proc] / 2 - 1; 4941 buf_si_i = buf_si + nrows + 1; 4942 buf_si[0] = nrows; 4943 buf_si_i[0] = 0; 4944 nrows = 0; 4945 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4946 anzi = ai[i + 1] - ai[i]; 4947 if (anzi) { 4948 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4949 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4950 nrows++; 4951 } 4952 } 4953 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4954 k++; 4955 buf_si += len_si[proc]; 4956 } 4957 4958 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4959 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4960 4961 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4962 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4963 4964 PetscCall(PetscFree(len_si)); 4965 PetscCall(PetscFree(len_ri)); 4966 PetscCall(PetscFree(rj_waits)); 4967 PetscCall(PetscFree2(si_waits, sj_waits)); 4968 PetscCall(PetscFree(ri_waits)); 4969 PetscCall(PetscFree(buf_s)); 4970 PetscCall(PetscFree(status)); 4971 4972 /* compute a local seq matrix in each processor */ 4973 /* allocate bi array and free space for accumulating nonzero column info */ 4974 PetscCall(PetscMalloc1(m + 1, &bi)); 4975 bi[0] = 0; 4976 4977 /* create and initialize a linked list */ 4978 nlnk = N + 1; 4979 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4980 4981 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4982 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4983 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4984 4985 current_space = free_space; 4986 4987 /* determine symbolic info for each local row */ 4988 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4989 4990 for (k = 0; k < merge->nrecv; k++) { 4991 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4992 nrows = *buf_ri_k[k]; 4993 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4994 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4995 } 4996 4997 MatPreallocateBegin(comm, m, n, dnz, onz); 4998 len = 0; 4999 for (i = 0; i < m; i++) { 5000 bnzi = 0; 5001 /* add local non-zero cols of this proc's seqmat into lnk */ 5002 arow = owners[rank] + i; 5003 anzi = ai[arow + 1] - ai[arow]; 5004 aj = a->j + ai[arow]; 5005 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5006 bnzi += nlnk; 5007 /* add received col data into lnk */ 5008 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5009 if (i == *nextrow[k]) { /* i-th row */ 5010 anzi = *(nextai[k] + 1) - *nextai[k]; 5011 aj = buf_rj[k] + *nextai[k]; 5012 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5013 bnzi += nlnk; 5014 nextrow[k]++; 5015 nextai[k]++; 5016 } 5017 } 5018 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5019 5020 /* if free space is not available, make more free space */ 5021 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5022 /* copy data into free space, then initialize lnk */ 5023 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5024 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5025 5026 current_space->array += bnzi; 5027 current_space->local_used += bnzi; 5028 current_space->local_remaining -= bnzi; 5029 5030 bi[i + 1] = bi[i] + bnzi; 5031 } 5032 5033 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5034 5035 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5036 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5037 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5038 5039 /* create symbolic parallel matrix B_mpi */ 5040 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5041 PetscCall(MatCreate(comm, &B_mpi)); 5042 if (n == PETSC_DECIDE) { 5043 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5044 } else { 5045 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5046 } 5047 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5048 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5049 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5050 MatPreallocateEnd(dnz, onz); 5051 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5052 5053 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5054 B_mpi->assembled = PETSC_FALSE; 5055 merge->bi = bi; 5056 merge->bj = bj; 5057 merge->buf_ri = buf_ri; 5058 merge->buf_rj = buf_rj; 5059 merge->coi = NULL; 5060 merge->coj = NULL; 5061 merge->owners_co = NULL; 5062 5063 PetscCall(PetscCommDestroy(&comm)); 5064 5065 /* attach the supporting struct to B_mpi for reuse */ 5066 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5067 PetscCall(PetscContainerSetPointer(container, merge)); 5068 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5069 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5070 PetscCall(PetscContainerDestroy(&container)); 5071 *mpimat = B_mpi; 5072 5073 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5074 PetscFunctionReturn(PETSC_SUCCESS); 5075 } 5076 5077 /*@ 5078 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5079 matrices from each processor 5080 5081 Collective 5082 5083 Input Parameters: 5084 + comm - the communicators the parallel matrix will live on 5085 . seqmat - the input sequential matrices 5086 . m - number of local rows (or `PETSC_DECIDE`) 5087 . n - number of local columns (or `PETSC_DECIDE`) 5088 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5089 5090 Output Parameter: 5091 . mpimat - the parallel matrix generated 5092 5093 Level: advanced 5094 5095 Note: 5096 The dimensions of the sequential matrix in each processor MUST be the same. 5097 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5098 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5099 5100 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5101 @*/ 5102 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5103 { 5104 PetscMPIInt size; 5105 5106 PetscFunctionBegin; 5107 PetscCallMPI(MPI_Comm_size(comm, &size)); 5108 if (size == 1) { 5109 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5110 if (scall == MAT_INITIAL_MATRIX) { 5111 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5112 } else { 5113 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5114 } 5115 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5116 PetscFunctionReturn(PETSC_SUCCESS); 5117 } 5118 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5119 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5120 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5121 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5122 PetscFunctionReturn(PETSC_SUCCESS); 5123 } 5124 5125 /*@ 5126 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5127 5128 Not Collective 5129 5130 Input Parameter: 5131 . A - the matrix 5132 5133 Output Parameter: 5134 . A_loc - the local sequential matrix generated 5135 5136 Level: developer 5137 5138 Notes: 5139 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5140 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5141 `n` is the global column count obtained with `MatGetSize()` 5142 5143 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5144 5145 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5146 5147 Destroy the matrix with `MatDestroy()` 5148 5149 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5150 @*/ 5151 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5152 { 5153 PetscBool mpi; 5154 5155 PetscFunctionBegin; 5156 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5157 if (mpi) { 5158 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5159 } else { 5160 *A_loc = A; 5161 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5162 } 5163 PetscFunctionReturn(PETSC_SUCCESS); 5164 } 5165 5166 /*@ 5167 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5168 5169 Not Collective 5170 5171 Input Parameters: 5172 + A - the matrix 5173 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5174 5175 Output Parameter: 5176 . A_loc - the local sequential matrix generated 5177 5178 Level: developer 5179 5180 Notes: 5181 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5182 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5183 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5184 5185 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5186 5187 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5188 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5189 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5190 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5191 5192 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5193 @*/ 5194 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5195 { 5196 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5197 Mat_SeqAIJ *mat, *a, *b; 5198 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5199 const PetscScalar *aa, *ba, *aav, *bav; 5200 PetscScalar *ca, *cam; 5201 PetscMPIInt size; 5202 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5203 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5204 PetscBool match; 5205 5206 PetscFunctionBegin; 5207 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5208 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5209 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5210 if (size == 1) { 5211 if (scall == MAT_INITIAL_MATRIX) { 5212 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5213 *A_loc = mpimat->A; 5214 } else if (scall == MAT_REUSE_MATRIX) { 5215 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5216 } 5217 PetscFunctionReturn(PETSC_SUCCESS); 5218 } 5219 5220 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5221 a = (Mat_SeqAIJ *)mpimat->A->data; 5222 b = (Mat_SeqAIJ *)mpimat->B->data; 5223 ai = a->i; 5224 aj = a->j; 5225 bi = b->i; 5226 bj = b->j; 5227 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5228 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5229 aa = aav; 5230 ba = bav; 5231 if (scall == MAT_INITIAL_MATRIX) { 5232 PetscCall(PetscMalloc1(1 + am, &ci)); 5233 ci[0] = 0; 5234 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5235 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5236 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5237 k = 0; 5238 for (i = 0; i < am; i++) { 5239 ncols_o = bi[i + 1] - bi[i]; 5240 ncols_d = ai[i + 1] - ai[i]; 5241 /* off-diagonal portion of A */ 5242 for (jo = 0; jo < ncols_o; jo++) { 5243 col = cmap[*bj]; 5244 if (col >= cstart) break; 5245 cj[k] = col; 5246 bj++; 5247 ca[k++] = *ba++; 5248 } 5249 /* diagonal portion of A */ 5250 for (j = 0; j < ncols_d; j++) { 5251 cj[k] = cstart + *aj++; 5252 ca[k++] = *aa++; 5253 } 5254 /* off-diagonal portion of A */ 5255 for (j = jo; j < ncols_o; j++) { 5256 cj[k] = cmap[*bj++]; 5257 ca[k++] = *ba++; 5258 } 5259 } 5260 /* put together the new matrix */ 5261 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5262 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5263 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5264 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5265 mat->free_a = PETSC_TRUE; 5266 mat->free_ij = PETSC_TRUE; 5267 mat->nonew = 0; 5268 } else if (scall == MAT_REUSE_MATRIX) { 5269 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5270 ci = mat->i; 5271 cj = mat->j; 5272 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5273 for (i = 0; i < am; i++) { 5274 /* off-diagonal portion of A */ 5275 ncols_o = bi[i + 1] - bi[i]; 5276 for (jo = 0; jo < ncols_o; jo++) { 5277 col = cmap[*bj]; 5278 if (col >= cstart) break; 5279 *cam++ = *ba++; 5280 bj++; 5281 } 5282 /* diagonal portion of A */ 5283 ncols_d = ai[i + 1] - ai[i]; 5284 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5285 /* off-diagonal portion of A */ 5286 for (j = jo; j < ncols_o; j++) { 5287 *cam++ = *ba++; 5288 bj++; 5289 } 5290 } 5291 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5292 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5293 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5294 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5295 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5296 PetscFunctionReturn(PETSC_SUCCESS); 5297 } 5298 5299 /*@ 5300 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5301 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5302 5303 Not Collective 5304 5305 Input Parameters: 5306 + A - the matrix 5307 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5308 5309 Output Parameters: 5310 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5311 - A_loc - the local sequential matrix generated 5312 5313 Level: developer 5314 5315 Note: 5316 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5317 part, then those associated with the off-diagonal part (in its local ordering) 5318 5319 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5320 @*/ 5321 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5322 { 5323 Mat Ao, Ad; 5324 const PetscInt *cmap; 5325 PetscMPIInt size; 5326 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5327 5328 PetscFunctionBegin; 5329 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5330 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5331 if (size == 1) { 5332 if (scall == MAT_INITIAL_MATRIX) { 5333 PetscCall(PetscObjectReference((PetscObject)Ad)); 5334 *A_loc = Ad; 5335 } else if (scall == MAT_REUSE_MATRIX) { 5336 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5337 } 5338 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5339 PetscFunctionReturn(PETSC_SUCCESS); 5340 } 5341 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5342 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5343 if (f) { 5344 PetscCall((*f)(A, scall, glob, A_loc)); 5345 } else { 5346 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5347 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5348 Mat_SeqAIJ *c; 5349 PetscInt *ai = a->i, *aj = a->j; 5350 PetscInt *bi = b->i, *bj = b->j; 5351 PetscInt *ci, *cj; 5352 const PetscScalar *aa, *ba; 5353 PetscScalar *ca; 5354 PetscInt i, j, am, dn, on; 5355 5356 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5357 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5358 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5359 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5360 if (scall == MAT_INITIAL_MATRIX) { 5361 PetscInt k; 5362 PetscCall(PetscMalloc1(1 + am, &ci)); 5363 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5364 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5365 ci[0] = 0; 5366 for (i = 0, k = 0; i < am; i++) { 5367 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5368 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5369 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5370 /* diagonal portion of A */ 5371 for (j = 0; j < ncols_d; j++, k++) { 5372 cj[k] = *aj++; 5373 ca[k] = *aa++; 5374 } 5375 /* off-diagonal portion of A */ 5376 for (j = 0; j < ncols_o; j++, k++) { 5377 cj[k] = dn + *bj++; 5378 ca[k] = *ba++; 5379 } 5380 } 5381 /* put together the new matrix */ 5382 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5383 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5384 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5385 c = (Mat_SeqAIJ *)(*A_loc)->data; 5386 c->free_a = PETSC_TRUE; 5387 c->free_ij = PETSC_TRUE; 5388 c->nonew = 0; 5389 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5390 } else if (scall == MAT_REUSE_MATRIX) { 5391 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5392 for (i = 0; i < am; i++) { 5393 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5394 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5395 /* diagonal portion of A */ 5396 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5397 /* off-diagonal portion of A */ 5398 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5399 } 5400 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5401 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5402 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5403 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5404 if (glob) { 5405 PetscInt cst, *gidx; 5406 5407 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5408 PetscCall(PetscMalloc1(dn + on, &gidx)); 5409 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5410 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5411 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5412 } 5413 } 5414 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5415 PetscFunctionReturn(PETSC_SUCCESS); 5416 } 5417 5418 /*@C 5419 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5420 5421 Not Collective 5422 5423 Input Parameters: 5424 + A - the matrix 5425 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5426 . row - index set of rows to extract (or `NULL`) 5427 - col - index set of columns to extract (or `NULL`) 5428 5429 Output Parameter: 5430 . A_loc - the local sequential matrix generated 5431 5432 Level: developer 5433 5434 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5435 @*/ 5436 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5437 { 5438 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5439 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5440 IS isrowa, iscola; 5441 Mat *aloc; 5442 PetscBool match; 5443 5444 PetscFunctionBegin; 5445 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5446 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5447 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5448 if (!row) { 5449 start = A->rmap->rstart; 5450 end = A->rmap->rend; 5451 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5452 } else { 5453 isrowa = *row; 5454 } 5455 if (!col) { 5456 start = A->cmap->rstart; 5457 cmap = a->garray; 5458 nzA = a->A->cmap->n; 5459 nzB = a->B->cmap->n; 5460 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5461 ncols = 0; 5462 for (i = 0; i < nzB; i++) { 5463 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5464 else break; 5465 } 5466 imark = i; 5467 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5468 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5469 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5470 } else { 5471 iscola = *col; 5472 } 5473 if (scall != MAT_INITIAL_MATRIX) { 5474 PetscCall(PetscMalloc1(1, &aloc)); 5475 aloc[0] = *A_loc; 5476 } 5477 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5478 if (!col) { /* attach global id of condensed columns */ 5479 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5480 } 5481 *A_loc = aloc[0]; 5482 PetscCall(PetscFree(aloc)); 5483 if (!row) PetscCall(ISDestroy(&isrowa)); 5484 if (!col) PetscCall(ISDestroy(&iscola)); 5485 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5486 PetscFunctionReturn(PETSC_SUCCESS); 5487 } 5488 5489 /* 5490 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5491 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5492 * on a global size. 5493 * */ 5494 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5495 { 5496 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5497 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5498 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5499 PetscMPIInt owner; 5500 PetscSFNode *iremote, *oiremote; 5501 const PetscInt *lrowindices; 5502 PetscSF sf, osf; 5503 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5504 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5505 MPI_Comm comm; 5506 ISLocalToGlobalMapping mapping; 5507 const PetscScalar *pd_a, *po_a; 5508 5509 PetscFunctionBegin; 5510 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5511 /* plocalsize is the number of roots 5512 * nrows is the number of leaves 5513 * */ 5514 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5515 PetscCall(ISGetLocalSize(rows, &nrows)); 5516 PetscCall(PetscCalloc1(nrows, &iremote)); 5517 PetscCall(ISGetIndices(rows, &lrowindices)); 5518 for (i = 0; i < nrows; i++) { 5519 /* Find a remote index and an owner for a row 5520 * The row could be local or remote 5521 * */ 5522 owner = 0; 5523 lidx = 0; 5524 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5525 iremote[i].index = lidx; 5526 iremote[i].rank = owner; 5527 } 5528 /* Create SF to communicate how many nonzero columns for each row */ 5529 PetscCall(PetscSFCreate(comm, &sf)); 5530 /* SF will figure out the number of nonzero columns for each row, and their 5531 * offsets 5532 * */ 5533 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5534 PetscCall(PetscSFSetFromOptions(sf)); 5535 PetscCall(PetscSFSetUp(sf)); 5536 5537 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5538 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5539 PetscCall(PetscCalloc1(nrows, &pnnz)); 5540 roffsets[0] = 0; 5541 roffsets[1] = 0; 5542 for (i = 0; i < plocalsize; i++) { 5543 /* diagonal */ 5544 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5545 /* off-diagonal */ 5546 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5547 /* compute offsets so that we relative location for each row */ 5548 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5549 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5550 } 5551 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5552 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5553 /* 'r' means root, and 'l' means leaf */ 5554 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5556 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5557 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5558 PetscCall(PetscSFDestroy(&sf)); 5559 PetscCall(PetscFree(roffsets)); 5560 PetscCall(PetscFree(nrcols)); 5561 dntotalcols = 0; 5562 ontotalcols = 0; 5563 ncol = 0; 5564 for (i = 0; i < nrows; i++) { 5565 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5566 ncol = PetscMax(pnnz[i], ncol); 5567 /* diagonal */ 5568 dntotalcols += nlcols[i * 2 + 0]; 5569 /* off-diagonal */ 5570 ontotalcols += nlcols[i * 2 + 1]; 5571 } 5572 /* We do not need to figure the right number of columns 5573 * since all the calculations will be done by going through the raw data 5574 * */ 5575 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5576 PetscCall(MatSetUp(*P_oth)); 5577 PetscCall(PetscFree(pnnz)); 5578 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5579 /* diagonal */ 5580 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5581 /* off-diagonal */ 5582 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5583 /* diagonal */ 5584 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5585 /* off-diagonal */ 5586 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5587 dntotalcols = 0; 5588 ontotalcols = 0; 5589 ntotalcols = 0; 5590 for (i = 0; i < nrows; i++) { 5591 owner = 0; 5592 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5593 /* Set iremote for diag matrix */ 5594 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5595 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5596 iremote[dntotalcols].rank = owner; 5597 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5598 ilocal[dntotalcols++] = ntotalcols++; 5599 } 5600 /* off-diagonal */ 5601 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5602 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5603 oiremote[ontotalcols].rank = owner; 5604 oilocal[ontotalcols++] = ntotalcols++; 5605 } 5606 } 5607 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5608 PetscCall(PetscFree(loffsets)); 5609 PetscCall(PetscFree(nlcols)); 5610 PetscCall(PetscSFCreate(comm, &sf)); 5611 /* P serves as roots and P_oth is leaves 5612 * Diag matrix 5613 * */ 5614 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5615 PetscCall(PetscSFSetFromOptions(sf)); 5616 PetscCall(PetscSFSetUp(sf)); 5617 5618 PetscCall(PetscSFCreate(comm, &osf)); 5619 /* off-diagonal */ 5620 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5621 PetscCall(PetscSFSetFromOptions(osf)); 5622 PetscCall(PetscSFSetUp(osf)); 5623 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5624 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5625 /* operate on the matrix internal data to save memory */ 5626 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5627 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5628 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5629 /* Convert to global indices for diag matrix */ 5630 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5631 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5632 /* We want P_oth store global indices */ 5633 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5634 /* Use memory scalable approach */ 5635 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5636 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5637 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5638 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5639 /* Convert back to local indices */ 5640 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5641 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5642 nout = 0; 5643 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5644 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5645 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5646 /* Exchange values */ 5647 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5649 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5650 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5651 /* Stop PETSc from shrinking memory */ 5652 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5653 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5654 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5655 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5656 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5657 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5658 PetscCall(PetscSFDestroy(&sf)); 5659 PetscCall(PetscSFDestroy(&osf)); 5660 PetscFunctionReturn(PETSC_SUCCESS); 5661 } 5662 5663 /* 5664 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5665 * This supports MPIAIJ and MAIJ 5666 * */ 5667 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5668 { 5669 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5670 Mat_SeqAIJ *p_oth; 5671 IS rows, map; 5672 PetscHMapI hamp; 5673 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5674 MPI_Comm comm; 5675 PetscSF sf, osf; 5676 PetscBool has; 5677 5678 PetscFunctionBegin; 5679 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5680 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5681 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5682 * and then create a submatrix (that often is an overlapping matrix) 5683 * */ 5684 if (reuse == MAT_INITIAL_MATRIX) { 5685 /* Use a hash table to figure out unique keys */ 5686 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5687 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5688 count = 0; 5689 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5690 for (i = 0; i < a->B->cmap->n; i++) { 5691 key = a->garray[i] / dof; 5692 PetscCall(PetscHMapIHas(hamp, key, &has)); 5693 if (!has) { 5694 mapping[i] = count; 5695 PetscCall(PetscHMapISet(hamp, key, count++)); 5696 } else { 5697 /* Current 'i' has the same value the previous step */ 5698 mapping[i] = count - 1; 5699 } 5700 } 5701 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5702 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5703 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5704 PetscCall(PetscCalloc1(htsize, &rowindices)); 5705 off = 0; 5706 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5707 PetscCall(PetscHMapIDestroy(&hamp)); 5708 PetscCall(PetscSortInt(htsize, rowindices)); 5709 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5710 /* In case, the matrix was already created but users want to recreate the matrix */ 5711 PetscCall(MatDestroy(P_oth)); 5712 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5713 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5714 PetscCall(ISDestroy(&map)); 5715 PetscCall(ISDestroy(&rows)); 5716 } else if (reuse == MAT_REUSE_MATRIX) { 5717 /* If matrix was already created, we simply update values using SF objects 5718 * that as attached to the matrix earlier. 5719 */ 5720 const PetscScalar *pd_a, *po_a; 5721 5722 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5723 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5724 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5725 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5726 /* Update values in place */ 5727 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5728 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5729 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5732 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5733 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5734 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5735 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5736 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5737 PetscFunctionReturn(PETSC_SUCCESS); 5738 } 5739 5740 /*@C 5741 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5742 5743 Collective 5744 5745 Input Parameters: 5746 + A - the first matrix in `MATMPIAIJ` format 5747 . B - the second matrix in `MATMPIAIJ` format 5748 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5749 5750 Output Parameters: 5751 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5752 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5753 - B_seq - the sequential matrix generated 5754 5755 Level: developer 5756 5757 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5758 @*/ 5759 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5760 { 5761 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5762 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5763 IS isrowb, iscolb; 5764 Mat *bseq = NULL; 5765 5766 PetscFunctionBegin; 5767 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5768 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5769 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5770 5771 if (scall == MAT_INITIAL_MATRIX) { 5772 start = A->cmap->rstart; 5773 cmap = a->garray; 5774 nzA = a->A->cmap->n; 5775 nzB = a->B->cmap->n; 5776 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5777 ncols = 0; 5778 for (i = 0; i < nzB; i++) { /* row < local row index */ 5779 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5780 else break; 5781 } 5782 imark = i; 5783 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5784 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5785 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5786 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5787 } else { 5788 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5789 isrowb = *rowb; 5790 iscolb = *colb; 5791 PetscCall(PetscMalloc1(1, &bseq)); 5792 bseq[0] = *B_seq; 5793 } 5794 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5795 *B_seq = bseq[0]; 5796 PetscCall(PetscFree(bseq)); 5797 if (!rowb) { 5798 PetscCall(ISDestroy(&isrowb)); 5799 } else { 5800 *rowb = isrowb; 5801 } 5802 if (!colb) { 5803 PetscCall(ISDestroy(&iscolb)); 5804 } else { 5805 *colb = iscolb; 5806 } 5807 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5808 PetscFunctionReturn(PETSC_SUCCESS); 5809 } 5810 5811 /* 5812 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5813 of the OFF-DIAGONAL portion of local A 5814 5815 Collective 5816 5817 Input Parameters: 5818 + A,B - the matrices in `MATMPIAIJ` format 5819 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5820 5821 Output Parameter: 5822 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5823 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5824 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5825 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5826 5827 Developer Note: 5828 This directly accesses information inside the VecScatter associated with the matrix-vector product 5829 for this matrix. This is not desirable.. 5830 5831 Level: developer 5832 5833 */ 5834 5835 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5836 { 5837 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5838 VecScatter ctx; 5839 MPI_Comm comm; 5840 const PetscMPIInt *rprocs, *sprocs; 5841 PetscMPIInt nrecvs, nsends; 5842 const PetscInt *srow, *rstarts, *sstarts; 5843 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5844 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5845 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5846 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5847 PetscMPIInt size, tag, rank, nreqs; 5848 5849 PetscFunctionBegin; 5850 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5851 PetscCallMPI(MPI_Comm_size(comm, &size)); 5852 5853 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5854 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5855 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5856 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5857 5858 if (size == 1) { 5859 startsj_s = NULL; 5860 bufa_ptr = NULL; 5861 *B_oth = NULL; 5862 PetscFunctionReturn(PETSC_SUCCESS); 5863 } 5864 5865 ctx = a->Mvctx; 5866 tag = ((PetscObject)ctx)->tag; 5867 5868 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5869 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5870 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5871 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5872 PetscCall(PetscMalloc1(nreqs, &reqs)); 5873 rwaits = reqs; 5874 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5875 5876 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5877 if (scall == MAT_INITIAL_MATRIX) { 5878 /* i-array */ 5879 /* post receives */ 5880 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5881 for (i = 0; i < nrecvs; i++) { 5882 rowlen = rvalues + rstarts[i] * rbs; 5883 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5884 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5885 } 5886 5887 /* pack the outgoing message */ 5888 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5889 5890 sstartsj[0] = 0; 5891 rstartsj[0] = 0; 5892 len = 0; /* total length of j or a array to be sent */ 5893 if (nsends) { 5894 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5895 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5896 } 5897 for (i = 0; i < nsends; i++) { 5898 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5899 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5900 for (j = 0; j < nrows; j++) { 5901 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5902 for (l = 0; l < sbs; l++) { 5903 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5904 5905 rowlen[j * sbs + l] = ncols; 5906 5907 len += ncols; 5908 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5909 } 5910 k++; 5911 } 5912 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5913 5914 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5915 } 5916 /* recvs and sends of i-array are completed */ 5917 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5918 PetscCall(PetscFree(svalues)); 5919 5920 /* allocate buffers for sending j and a arrays */ 5921 PetscCall(PetscMalloc1(len + 1, &bufj)); 5922 PetscCall(PetscMalloc1(len + 1, &bufa)); 5923 5924 /* create i-array of B_oth */ 5925 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5926 5927 b_othi[0] = 0; 5928 len = 0; /* total length of j or a array to be received */ 5929 k = 0; 5930 for (i = 0; i < nrecvs; i++) { 5931 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5932 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5933 for (j = 0; j < nrows; j++) { 5934 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5935 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5936 k++; 5937 } 5938 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5939 } 5940 PetscCall(PetscFree(rvalues)); 5941 5942 /* allocate space for j and a arrays of B_oth */ 5943 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5944 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5945 5946 /* j-array */ 5947 /* post receives of j-array */ 5948 for (i = 0; i < nrecvs; i++) { 5949 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5950 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5951 } 5952 5953 /* pack the outgoing message j-array */ 5954 if (nsends) k = sstarts[0]; 5955 for (i = 0; i < nsends; i++) { 5956 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5957 bufJ = bufj + sstartsj[i]; 5958 for (j = 0; j < nrows; j++) { 5959 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5960 for (ll = 0; ll < sbs; ll++) { 5961 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5962 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5963 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5964 } 5965 } 5966 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5967 } 5968 5969 /* recvs and sends of j-array are completed */ 5970 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5971 } else if (scall == MAT_REUSE_MATRIX) { 5972 sstartsj = *startsj_s; 5973 rstartsj = *startsj_r; 5974 bufa = *bufa_ptr; 5975 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5976 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5977 5978 /* a-array */ 5979 /* post receives of a-array */ 5980 for (i = 0; i < nrecvs; i++) { 5981 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5982 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5983 } 5984 5985 /* pack the outgoing message a-array */ 5986 if (nsends) k = sstarts[0]; 5987 for (i = 0; i < nsends; i++) { 5988 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5989 bufA = bufa + sstartsj[i]; 5990 for (j = 0; j < nrows; j++) { 5991 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5992 for (ll = 0; ll < sbs; ll++) { 5993 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5994 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5995 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5996 } 5997 } 5998 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5999 } 6000 /* recvs and sends of a-array are completed */ 6001 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6002 PetscCall(PetscFree(reqs)); 6003 6004 if (scall == MAT_INITIAL_MATRIX) { 6005 Mat_SeqAIJ *b_oth; 6006 6007 /* put together the new matrix */ 6008 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6009 6010 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6011 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6012 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6013 b_oth->free_a = PETSC_TRUE; 6014 b_oth->free_ij = PETSC_TRUE; 6015 b_oth->nonew = 0; 6016 6017 PetscCall(PetscFree(bufj)); 6018 if (!startsj_s || !bufa_ptr) { 6019 PetscCall(PetscFree2(sstartsj, rstartsj)); 6020 PetscCall(PetscFree(bufa_ptr)); 6021 } else { 6022 *startsj_s = sstartsj; 6023 *startsj_r = rstartsj; 6024 *bufa_ptr = bufa; 6025 } 6026 } else if (scall == MAT_REUSE_MATRIX) { 6027 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6028 } 6029 6030 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6031 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6032 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6033 PetscFunctionReturn(PETSC_SUCCESS); 6034 } 6035 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6039 #if defined(PETSC_HAVE_MKL_SPARSE) 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6044 #if defined(PETSC_HAVE_ELEMENTAL) 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6046 #endif 6047 #if defined(PETSC_HAVE_SCALAPACK) 6048 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 #if defined(PETSC_HAVE_HYPRE) 6051 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6052 #endif 6053 #if defined(PETSC_HAVE_CUDA) 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6055 #endif 6056 #if defined(PETSC_HAVE_HIP) 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6058 #endif 6059 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6063 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6064 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6065 6066 /* 6067 Computes (B'*A')' since computing B*A directly is untenable 6068 6069 n p p 6070 [ ] [ ] [ ] 6071 m [ A ] * n [ B ] = m [ C ] 6072 [ ] [ ] [ ] 6073 6074 */ 6075 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6076 { 6077 Mat At, Bt, Ct; 6078 6079 PetscFunctionBegin; 6080 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6081 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6082 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6083 PetscCall(MatDestroy(&At)); 6084 PetscCall(MatDestroy(&Bt)); 6085 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6086 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6087 PetscCall(MatDestroy(&Ct)); 6088 PetscFunctionReturn(PETSC_SUCCESS); 6089 } 6090 6091 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6092 { 6093 PetscBool cisdense; 6094 6095 PetscFunctionBegin; 6096 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6097 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6098 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6099 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6100 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6101 PetscCall(MatSetUp(C)); 6102 6103 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6104 PetscFunctionReturn(PETSC_SUCCESS); 6105 } 6106 6107 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6108 { 6109 Mat_Product *product = C->product; 6110 Mat A = product->A, B = product->B; 6111 6112 PetscFunctionBegin; 6113 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6114 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6115 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6116 C->ops->productsymbolic = MatProductSymbolic_AB; 6117 PetscFunctionReturn(PETSC_SUCCESS); 6118 } 6119 6120 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6121 { 6122 Mat_Product *product = C->product; 6123 6124 PetscFunctionBegin; 6125 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6126 PetscFunctionReturn(PETSC_SUCCESS); 6127 } 6128 6129 /* 6130 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6131 6132 Input Parameters: 6133 6134 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6135 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6136 6137 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6138 6139 For Set1, j1[] contains column indices of the nonzeros. 6140 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6141 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6142 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6143 6144 Similar for Set2. 6145 6146 This routine merges the two sets of nonzeros row by row and removes repeats. 6147 6148 Output Parameters: (memory is allocated by the caller) 6149 6150 i[],j[]: the CSR of the merged matrix, which has m rows. 6151 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6152 imap2[]: similar to imap1[], but for Set2. 6153 Note we order nonzeros row-by-row and from left to right. 6154 */ 6155 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6156 { 6157 PetscInt r, m; /* Row index of mat */ 6158 PetscCount t, t1, t2, b1, e1, b2, e2; 6159 6160 PetscFunctionBegin; 6161 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6162 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6163 i[0] = 0; 6164 for (r = 0; r < m; r++) { /* Do row by row merging */ 6165 b1 = rowBegin1[r]; 6166 e1 = rowEnd1[r]; 6167 b2 = rowBegin2[r]; 6168 e2 = rowEnd2[r]; 6169 while (b1 < e1 && b2 < e2) { 6170 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6171 j[t] = j1[b1]; 6172 imap1[t1] = t; 6173 imap2[t2] = t; 6174 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6175 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6176 t1++; 6177 t2++; 6178 t++; 6179 } else if (j1[b1] < j2[b2]) { 6180 j[t] = j1[b1]; 6181 imap1[t1] = t; 6182 b1 += jmap1[t1 + 1] - jmap1[t1]; 6183 t1++; 6184 t++; 6185 } else { 6186 j[t] = j2[b2]; 6187 imap2[t2] = t; 6188 b2 += jmap2[t2 + 1] - jmap2[t2]; 6189 t2++; 6190 t++; 6191 } 6192 } 6193 /* Merge the remaining in either j1[] or j2[] */ 6194 while (b1 < e1) { 6195 j[t] = j1[b1]; 6196 imap1[t1] = t; 6197 b1 += jmap1[t1 + 1] - jmap1[t1]; 6198 t1++; 6199 t++; 6200 } 6201 while (b2 < e2) { 6202 j[t] = j2[b2]; 6203 imap2[t2] = t; 6204 b2 += jmap2[t2 + 1] - jmap2[t2]; 6205 t2++; 6206 t++; 6207 } 6208 PetscCall(PetscIntCast(t, i + r + 1)); 6209 } 6210 PetscFunctionReturn(PETSC_SUCCESS); 6211 } 6212 6213 /* 6214 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6215 6216 Input Parameters: 6217 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6218 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6219 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6220 6221 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6222 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6223 6224 Output Parameters: 6225 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6226 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6227 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6228 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6229 6230 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6231 Atot: number of entries belonging to the diagonal block. 6232 Annz: number of unique nonzeros belonging to the diagonal block. 6233 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6234 repeats (i.e., same 'i,j' pair). 6235 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6236 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6237 6238 Atot: number of entries belonging to the diagonal block 6239 Annz: number of unique nonzeros belonging to the diagonal block. 6240 6241 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6242 6243 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6244 */ 6245 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6246 { 6247 PetscInt cstart, cend, rstart, rend, row, col; 6248 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6249 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6250 PetscCount k, m, p, q, r, s, mid; 6251 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6252 6253 PetscFunctionBegin; 6254 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6255 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6256 m = rend - rstart; 6257 6258 /* Skip negative rows */ 6259 for (k = 0; k < n; k++) 6260 if (i[k] >= 0) break; 6261 6262 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6263 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6264 */ 6265 while (k < n) { 6266 row = i[k]; 6267 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6268 for (s = k; s < n; s++) 6269 if (i[s] != row) break; 6270 6271 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6272 for (p = k; p < s; p++) { 6273 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6274 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6275 } 6276 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6277 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6278 rowBegin[row - rstart] = k; 6279 rowMid[row - rstart] = mid; 6280 rowEnd[row - rstart] = s; 6281 6282 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6283 Atot += mid - k; 6284 Btot += s - mid; 6285 6286 /* Count unique nonzeros of this diag row */ 6287 for (p = k; p < mid;) { 6288 col = j[p]; 6289 do { 6290 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6291 p++; 6292 } while (p < mid && j[p] == col); 6293 Annz++; 6294 } 6295 6296 /* Count unique nonzeros of this offdiag row */ 6297 for (p = mid; p < s;) { 6298 col = j[p]; 6299 do { 6300 p++; 6301 } while (p < s && j[p] == col); 6302 Bnnz++; 6303 } 6304 k = s; 6305 } 6306 6307 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6308 PetscCall(PetscMalloc1(Atot, &Aperm)); 6309 PetscCall(PetscMalloc1(Btot, &Bperm)); 6310 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6311 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6312 6313 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6314 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6315 for (r = 0; r < m; r++) { 6316 k = rowBegin[r]; 6317 mid = rowMid[r]; 6318 s = rowEnd[r]; 6319 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6320 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6321 Atot += mid - k; 6322 Btot += s - mid; 6323 6324 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6325 for (p = k; p < mid;) { 6326 col = j[p]; 6327 q = p; 6328 do { 6329 p++; 6330 } while (p < mid && j[p] == col); 6331 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6332 Annz++; 6333 } 6334 6335 for (p = mid; p < s;) { 6336 col = j[p]; 6337 q = p; 6338 do { 6339 p++; 6340 } while (p < s && j[p] == col); 6341 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6342 Bnnz++; 6343 } 6344 } 6345 /* Output */ 6346 *Aperm_ = Aperm; 6347 *Annz_ = Annz; 6348 *Atot_ = Atot; 6349 *Ajmap_ = Ajmap; 6350 *Bperm_ = Bperm; 6351 *Bnnz_ = Bnnz; 6352 *Btot_ = Btot; 6353 *Bjmap_ = Bjmap; 6354 PetscFunctionReturn(PETSC_SUCCESS); 6355 } 6356 6357 /* 6358 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6359 6360 Input Parameters: 6361 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6362 nnz: number of unique nonzeros in the merged matrix 6363 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6364 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6365 6366 Output Parameter: (memory is allocated by the caller) 6367 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6368 6369 Example: 6370 nnz1 = 4 6371 nnz = 6 6372 imap = [1,3,4,5] 6373 jmap = [0,3,5,6,7] 6374 then, 6375 jmap_new = [0,0,3,3,5,6,7] 6376 */ 6377 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6378 { 6379 PetscCount k, p; 6380 6381 PetscFunctionBegin; 6382 jmap_new[0] = 0; 6383 p = nnz; /* p loops over jmap_new[] backwards */ 6384 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6385 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6386 } 6387 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6388 PetscFunctionReturn(PETSC_SUCCESS); 6389 } 6390 6391 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6392 { 6393 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6394 6395 PetscFunctionBegin; 6396 PetscCall(PetscSFDestroy(&coo->sf)); 6397 PetscCall(PetscFree(coo->Aperm1)); 6398 PetscCall(PetscFree(coo->Bperm1)); 6399 PetscCall(PetscFree(coo->Ajmap1)); 6400 PetscCall(PetscFree(coo->Bjmap1)); 6401 PetscCall(PetscFree(coo->Aimap2)); 6402 PetscCall(PetscFree(coo->Bimap2)); 6403 PetscCall(PetscFree(coo->Aperm2)); 6404 PetscCall(PetscFree(coo->Bperm2)); 6405 PetscCall(PetscFree(coo->Ajmap2)); 6406 PetscCall(PetscFree(coo->Bjmap2)); 6407 PetscCall(PetscFree(coo->Cperm1)); 6408 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6409 PetscCall(PetscFree(coo)); 6410 PetscFunctionReturn(PETSC_SUCCESS); 6411 } 6412 6413 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6414 { 6415 MPI_Comm comm; 6416 PetscMPIInt rank, size; 6417 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6418 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6419 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6420 PetscContainer container; 6421 MatCOOStruct_MPIAIJ *coo; 6422 6423 PetscFunctionBegin; 6424 PetscCall(PetscFree(mpiaij->garray)); 6425 PetscCall(VecDestroy(&mpiaij->lvec)); 6426 #if defined(PETSC_USE_CTABLE) 6427 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6428 #else 6429 PetscCall(PetscFree(mpiaij->colmap)); 6430 #endif 6431 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6432 mat->assembled = PETSC_FALSE; 6433 mat->was_assembled = PETSC_FALSE; 6434 6435 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6436 PetscCallMPI(MPI_Comm_size(comm, &size)); 6437 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6438 PetscCall(PetscLayoutSetUp(mat->rmap)); 6439 PetscCall(PetscLayoutSetUp(mat->cmap)); 6440 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6441 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6442 PetscCall(MatGetLocalSize(mat, &m, &n)); 6443 PetscCall(MatGetSize(mat, &M, &N)); 6444 6445 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6446 /* entries come first, then local rows, then remote rows. */ 6447 PetscCount n1 = coo_n, *perm1; 6448 PetscInt *i1 = coo_i, *j1 = coo_j; 6449 6450 PetscCall(PetscMalloc1(n1, &perm1)); 6451 for (k = 0; k < n1; k++) perm1[k] = k; 6452 6453 /* Manipulate indices so that entries with negative row or col indices will have smallest 6454 row indices, local entries will have greater but negative row indices, and remote entries 6455 will have positive row indices. 6456 */ 6457 for (k = 0; k < n1; k++) { 6458 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6459 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6460 else { 6461 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6462 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6463 } 6464 } 6465 6466 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6467 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6468 6469 /* Advance k to the first entry we need to take care of */ 6470 for (k = 0; k < n1; k++) 6471 if (i1[k] > PETSC_INT_MIN) break; 6472 PetscCount i1start = k; 6473 6474 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6475 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6476 6477 /* Send remote rows to their owner */ 6478 /* Find which rows should be sent to which remote ranks*/ 6479 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6480 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6481 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6482 const PetscInt *ranges; 6483 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6484 6485 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6486 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6487 for (k = rem; k < n1;) { 6488 PetscMPIInt owner; 6489 PetscInt firstRow, lastRow; 6490 6491 /* Locate a row range */ 6492 firstRow = i1[k]; /* first row of this owner */ 6493 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6494 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6495 6496 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6497 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6498 6499 /* All entries in [k,p) belong to this remote owner */ 6500 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6501 PetscMPIInt *sendto2; 6502 PetscInt *nentries2; 6503 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6504 6505 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6506 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6507 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6508 PetscCall(PetscFree2(sendto, nentries2)); 6509 sendto = sendto2; 6510 nentries = nentries2; 6511 maxNsend = maxNsend2; 6512 } 6513 sendto[nsend] = owner; 6514 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6515 nsend++; 6516 k = p; 6517 } 6518 6519 /* Build 1st SF to know offsets on remote to send data */ 6520 PetscSF sf1; 6521 PetscInt nroots = 1, nroots2 = 0; 6522 PetscInt nleaves = nsend, nleaves2 = 0; 6523 PetscInt *offsets; 6524 PetscSFNode *iremote; 6525 6526 PetscCall(PetscSFCreate(comm, &sf1)); 6527 PetscCall(PetscMalloc1(nsend, &iremote)); 6528 PetscCall(PetscMalloc1(nsend, &offsets)); 6529 for (k = 0; k < nsend; k++) { 6530 iremote[k].rank = sendto[k]; 6531 iremote[k].index = 0; 6532 nleaves2 += nentries[k]; 6533 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6534 } 6535 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6536 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6537 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6538 PetscCall(PetscSFDestroy(&sf1)); 6539 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6540 6541 /* Build 2nd SF to send remote COOs to their owner */ 6542 PetscSF sf2; 6543 nroots = nroots2; 6544 nleaves = nleaves2; 6545 PetscCall(PetscSFCreate(comm, &sf2)); 6546 PetscCall(PetscSFSetFromOptions(sf2)); 6547 PetscCall(PetscMalloc1(nleaves, &iremote)); 6548 p = 0; 6549 for (k = 0; k < nsend; k++) { 6550 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6551 for (q = 0; q < nentries[k]; q++, p++) { 6552 iremote[p].rank = sendto[k]; 6553 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6554 } 6555 } 6556 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6557 6558 /* Send the remote COOs to their owner */ 6559 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6560 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6561 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6562 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6563 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6564 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6565 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6566 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6567 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6568 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6569 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6570 6571 PetscCall(PetscFree(offsets)); 6572 PetscCall(PetscFree2(sendto, nentries)); 6573 6574 /* Sort received COOs by row along with the permutation array */ 6575 for (k = 0; k < n2; k++) perm2[k] = k; 6576 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6577 6578 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6579 PetscCount *Cperm1; 6580 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6581 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6582 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6583 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6584 6585 /* Support for HYPRE matrices, kind of a hack. 6586 Swap min column with diagonal so that diagonal values will go first */ 6587 PetscBool hypre; 6588 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6589 if (hypre) { 6590 PetscInt *minj; 6591 PetscBT hasdiag; 6592 6593 PetscCall(PetscBTCreate(m, &hasdiag)); 6594 PetscCall(PetscMalloc1(m, &minj)); 6595 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6596 for (k = i1start; k < rem; k++) { 6597 if (j1[k] < cstart || j1[k] >= cend) continue; 6598 const PetscInt rindex = i1[k] - rstart; 6599 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6600 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6601 } 6602 for (k = 0; k < n2; k++) { 6603 if (j2[k] < cstart || j2[k] >= cend) continue; 6604 const PetscInt rindex = i2[k] - rstart; 6605 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6606 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6607 } 6608 for (k = i1start; k < rem; k++) { 6609 const PetscInt rindex = i1[k] - rstart; 6610 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6611 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6612 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6613 } 6614 for (k = 0; k < n2; k++) { 6615 const PetscInt rindex = i2[k] - rstart; 6616 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6617 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6618 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6619 } 6620 PetscCall(PetscBTDestroy(&hasdiag)); 6621 PetscCall(PetscFree(minj)); 6622 } 6623 6624 /* Split local COOs and received COOs into diag/offdiag portions */ 6625 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6626 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6627 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6628 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6629 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6630 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6631 6632 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6633 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6634 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6635 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6636 6637 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6638 PetscInt *Ai, *Bi; 6639 PetscInt *Aj, *Bj; 6640 6641 PetscCall(PetscMalloc1(m + 1, &Ai)); 6642 PetscCall(PetscMalloc1(m + 1, &Bi)); 6643 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6644 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6645 6646 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6647 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6648 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6649 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6650 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6651 6652 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6653 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6654 6655 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6656 /* expect nonzeros in A/B most likely have local contributing entries */ 6657 PetscInt Annz = Ai[m]; 6658 PetscInt Bnnz = Bi[m]; 6659 PetscCount *Ajmap1_new, *Bjmap1_new; 6660 6661 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6662 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6663 6664 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6665 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6666 6667 PetscCall(PetscFree(Aimap1)); 6668 PetscCall(PetscFree(Ajmap1)); 6669 PetscCall(PetscFree(Bimap1)); 6670 PetscCall(PetscFree(Bjmap1)); 6671 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6672 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6673 PetscCall(PetscFree(perm1)); 6674 PetscCall(PetscFree3(i2, j2, perm2)); 6675 6676 Ajmap1 = Ajmap1_new; 6677 Bjmap1 = Bjmap1_new; 6678 6679 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6680 if (Annz < Annz1 + Annz2) { 6681 PetscInt *Aj_new; 6682 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6683 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6684 PetscCall(PetscFree(Aj)); 6685 Aj = Aj_new; 6686 } 6687 6688 if (Bnnz < Bnnz1 + Bnnz2) { 6689 PetscInt *Bj_new; 6690 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6691 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6692 PetscCall(PetscFree(Bj)); 6693 Bj = Bj_new; 6694 } 6695 6696 /* Create new submatrices for on-process and off-process coupling */ 6697 PetscScalar *Aa, *Ba; 6698 MatType rtype; 6699 Mat_SeqAIJ *a, *b; 6700 PetscObjectState state; 6701 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6702 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6703 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6704 if (cstart) { 6705 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6706 } 6707 6708 PetscCall(MatGetRootType_Private(mat, &rtype)); 6709 6710 MatSeqXAIJGetOptions_Private(mpiaij->A); 6711 PetscCall(MatDestroy(&mpiaij->A)); 6712 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6713 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6714 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6715 6716 MatSeqXAIJGetOptions_Private(mpiaij->B); 6717 PetscCall(MatDestroy(&mpiaij->B)); 6718 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6719 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6720 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6721 6722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6723 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6724 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6725 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6726 6727 a = (Mat_SeqAIJ *)mpiaij->A->data; 6728 b = (Mat_SeqAIJ *)mpiaij->B->data; 6729 a->free_a = PETSC_TRUE; 6730 a->free_ij = PETSC_TRUE; 6731 b->free_a = PETSC_TRUE; 6732 b->free_ij = PETSC_TRUE; 6733 a->maxnz = a->nz; 6734 b->maxnz = b->nz; 6735 6736 /* conversion must happen AFTER multiply setup */ 6737 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6738 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6739 PetscCall(VecDestroy(&mpiaij->lvec)); 6740 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6741 6742 // Put the COO struct in a container and then attach that to the matrix 6743 PetscCall(PetscMalloc1(1, &coo)); 6744 coo->n = coo_n; 6745 coo->sf = sf2; 6746 coo->sendlen = nleaves; 6747 coo->recvlen = nroots; 6748 coo->Annz = Annz; 6749 coo->Bnnz = Bnnz; 6750 coo->Annz2 = Annz2; 6751 coo->Bnnz2 = Bnnz2; 6752 coo->Atot1 = Atot1; 6753 coo->Atot2 = Atot2; 6754 coo->Btot1 = Btot1; 6755 coo->Btot2 = Btot2; 6756 coo->Ajmap1 = Ajmap1; 6757 coo->Aperm1 = Aperm1; 6758 coo->Bjmap1 = Bjmap1; 6759 coo->Bperm1 = Bperm1; 6760 coo->Aimap2 = Aimap2; 6761 coo->Ajmap2 = Ajmap2; 6762 coo->Aperm2 = Aperm2; 6763 coo->Bimap2 = Bimap2; 6764 coo->Bjmap2 = Bjmap2; 6765 coo->Bperm2 = Bperm2; 6766 coo->Cperm1 = Cperm1; 6767 // Allocate in preallocation. If not used, it has zero cost on host 6768 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6769 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6770 PetscCall(PetscContainerSetPointer(container, coo)); 6771 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6772 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6773 PetscCall(PetscContainerDestroy(&container)); 6774 PetscFunctionReturn(PETSC_SUCCESS); 6775 } 6776 6777 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6778 { 6779 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6780 Mat A = mpiaij->A, B = mpiaij->B; 6781 PetscScalar *Aa, *Ba; 6782 PetscScalar *sendbuf, *recvbuf; 6783 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6784 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6785 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6786 const PetscCount *Cperm1; 6787 PetscContainer container; 6788 MatCOOStruct_MPIAIJ *coo; 6789 6790 PetscFunctionBegin; 6791 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6792 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6793 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6794 sendbuf = coo->sendbuf; 6795 recvbuf = coo->recvbuf; 6796 Ajmap1 = coo->Ajmap1; 6797 Ajmap2 = coo->Ajmap2; 6798 Aimap2 = coo->Aimap2; 6799 Bjmap1 = coo->Bjmap1; 6800 Bjmap2 = coo->Bjmap2; 6801 Bimap2 = coo->Bimap2; 6802 Aperm1 = coo->Aperm1; 6803 Aperm2 = coo->Aperm2; 6804 Bperm1 = coo->Bperm1; 6805 Bperm2 = coo->Bperm2; 6806 Cperm1 = coo->Cperm1; 6807 6808 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6809 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6810 6811 /* Pack entries to be sent to remote */ 6812 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6813 6814 /* Send remote entries to their owner and overlap the communication with local computation */ 6815 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6816 /* Add local entries to A and B */ 6817 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6818 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6819 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6820 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6821 } 6822 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6823 PetscScalar sum = 0.0; 6824 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6825 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6826 } 6827 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6828 6829 /* Add received remote entries to A and B */ 6830 for (PetscCount i = 0; i < coo->Annz2; i++) { 6831 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6832 } 6833 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6834 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6835 } 6836 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6837 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6838 PetscFunctionReturn(PETSC_SUCCESS); 6839 } 6840 6841 /*MC 6842 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6843 6844 Options Database Keys: 6845 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6846 6847 Level: beginner 6848 6849 Notes: 6850 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6851 in this case the values associated with the rows and columns one passes in are set to zero 6852 in the matrix 6853 6854 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6855 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6856 6857 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6858 M*/ 6859 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6860 { 6861 Mat_MPIAIJ *b; 6862 PetscMPIInt size; 6863 6864 PetscFunctionBegin; 6865 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6866 6867 PetscCall(PetscNew(&b)); 6868 B->data = (void *)b; 6869 B->ops[0] = MatOps_Values; 6870 B->assembled = PETSC_FALSE; 6871 B->insertmode = NOT_SET_VALUES; 6872 b->size = size; 6873 6874 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6875 6876 /* build cache for off array entries formed */ 6877 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6878 6879 b->donotstash = PETSC_FALSE; 6880 b->colmap = NULL; 6881 b->garray = NULL; 6882 b->roworiented = PETSC_TRUE; 6883 6884 /* stuff used for matrix vector multiply */ 6885 b->lvec = NULL; 6886 b->Mvctx = NULL; 6887 6888 /* stuff for MatGetRow() */ 6889 b->rowindices = NULL; 6890 b->rowvalues = NULL; 6891 b->getrowactive = PETSC_FALSE; 6892 6893 /* flexible pointer used in CUSPARSE classes */ 6894 b->spptr = NULL; 6895 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6906 #if defined(PETSC_HAVE_CUDA) 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6908 #endif 6909 #if defined(PETSC_HAVE_HIP) 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6911 #endif 6912 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6914 #endif 6915 #if defined(PETSC_HAVE_MKL_SPARSE) 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6917 #endif 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6922 #if defined(PETSC_HAVE_ELEMENTAL) 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6924 #endif 6925 #if defined(PETSC_HAVE_SCALAPACK) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6927 #endif 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6930 #if defined(PETSC_HAVE_HYPRE) 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6933 #endif 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6938 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6939 PetscFunctionReturn(PETSC_SUCCESS); 6940 } 6941 6942 /*@ 6943 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6944 and "off-diagonal" part of the matrix in CSR format. 6945 6946 Collective 6947 6948 Input Parameters: 6949 + comm - MPI communicator 6950 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6951 . n - This value should be the same as the local size used in creating the 6952 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6953 calculated if `N` is given) For square matrices `n` is almost always `m`. 6954 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6955 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6956 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6957 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6958 . a - matrix values 6959 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6960 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6961 - oa - matrix values 6962 6963 Output Parameter: 6964 . mat - the matrix 6965 6966 Level: advanced 6967 6968 Notes: 6969 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6970 must free the arrays once the matrix has been destroyed and not before. 6971 6972 The `i` and `j` indices are 0 based 6973 6974 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6975 6976 This sets local rows and cannot be used to set off-processor values. 6977 6978 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6979 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6980 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6981 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6982 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6983 communication if it is known that only local entries will be set. 6984 6985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6986 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6987 @*/ 6988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6989 { 6990 Mat_MPIAIJ *maij; 6991 6992 PetscFunctionBegin; 6993 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6994 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6995 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6996 PetscCall(MatCreate(comm, mat)); 6997 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6998 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6999 maij = (Mat_MPIAIJ *)(*mat)->data; 7000 7001 (*mat)->preallocated = PETSC_TRUE; 7002 7003 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7004 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7005 7006 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7007 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7008 7009 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7010 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7011 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7012 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7013 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7014 PetscFunctionReturn(PETSC_SUCCESS); 7015 } 7016 7017 typedef struct { 7018 Mat *mp; /* intermediate products */ 7019 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7020 PetscInt cp; /* number of intermediate products */ 7021 7022 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7023 PetscInt *startsj_s, *startsj_r; 7024 PetscScalar *bufa; 7025 Mat P_oth; 7026 7027 /* may take advantage of merging product->B */ 7028 Mat Bloc; /* B-local by merging diag and off-diag */ 7029 7030 /* cusparse does not have support to split between symbolic and numeric phases. 7031 When api_user is true, we don't need to update the numerical values 7032 of the temporary storage */ 7033 PetscBool reusesym; 7034 7035 /* support for COO values insertion */ 7036 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7037 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7038 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7039 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7040 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7041 PetscMemType mtype; 7042 7043 /* customization */ 7044 PetscBool abmerge; 7045 PetscBool P_oth_bind; 7046 } MatMatMPIAIJBACKEND; 7047 7048 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7049 { 7050 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7051 PetscInt i; 7052 7053 PetscFunctionBegin; 7054 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7055 PetscCall(PetscFree(mmdata->bufa)); 7056 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7057 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7058 PetscCall(MatDestroy(&mmdata->P_oth)); 7059 PetscCall(MatDestroy(&mmdata->Bloc)); 7060 PetscCall(PetscSFDestroy(&mmdata->sf)); 7061 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7062 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7063 PetscCall(PetscFree(mmdata->own[0])); 7064 PetscCall(PetscFree(mmdata->own)); 7065 PetscCall(PetscFree(mmdata->off[0])); 7066 PetscCall(PetscFree(mmdata->off)); 7067 PetscCall(PetscFree(mmdata)); 7068 PetscFunctionReturn(PETSC_SUCCESS); 7069 } 7070 7071 /* Copy selected n entries with indices in idx[] of A to v[]. 7072 If idx is NULL, copy the whole data array of A to v[] 7073 */ 7074 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7075 { 7076 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7077 7078 PetscFunctionBegin; 7079 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7080 if (f) { 7081 PetscCall((*f)(A, n, idx, v)); 7082 } else { 7083 const PetscScalar *vv; 7084 7085 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7086 if (n && idx) { 7087 PetscScalar *w = v; 7088 const PetscInt *oi = idx; 7089 PetscInt j; 7090 7091 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7092 } else { 7093 PetscCall(PetscArraycpy(v, vv, n)); 7094 } 7095 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7096 } 7097 PetscFunctionReturn(PETSC_SUCCESS); 7098 } 7099 7100 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7101 { 7102 MatMatMPIAIJBACKEND *mmdata; 7103 PetscInt i, n_d, n_o; 7104 7105 PetscFunctionBegin; 7106 MatCheckProduct(C, 1); 7107 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7108 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7109 if (!mmdata->reusesym) { /* update temporary matrices */ 7110 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7111 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7112 } 7113 mmdata->reusesym = PETSC_FALSE; 7114 7115 for (i = 0; i < mmdata->cp; i++) { 7116 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7117 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7118 } 7119 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7120 PetscInt noff; 7121 7122 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7123 if (mmdata->mptmp[i]) continue; 7124 if (noff) { 7125 PetscInt nown; 7126 7127 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7128 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7129 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7130 n_o += noff; 7131 n_d += nown; 7132 } else { 7133 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7134 7135 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7136 n_d += mm->nz; 7137 } 7138 } 7139 if (mmdata->hasoffproc) { /* offprocess insertion */ 7140 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7141 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7142 } 7143 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7144 PetscFunctionReturn(PETSC_SUCCESS); 7145 } 7146 7147 /* Support for Pt * A, A * P, or Pt * A * P */ 7148 #define MAX_NUMBER_INTERMEDIATE 4 7149 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7150 { 7151 Mat_Product *product = C->product; 7152 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7153 Mat_MPIAIJ *a, *p; 7154 MatMatMPIAIJBACKEND *mmdata; 7155 ISLocalToGlobalMapping P_oth_l2g = NULL; 7156 IS glob = NULL; 7157 const char *prefix; 7158 char pprefix[256]; 7159 const PetscInt *globidx, *P_oth_idx; 7160 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7161 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7162 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7163 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7164 /* a base offset; type-2: sparse with a local to global map table */ 7165 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7166 7167 MatProductType ptype; 7168 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7169 PetscMPIInt size; 7170 7171 PetscFunctionBegin; 7172 MatCheckProduct(C, 1); 7173 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7174 ptype = product->type; 7175 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7176 ptype = MATPRODUCT_AB; 7177 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7178 } 7179 switch (ptype) { 7180 case MATPRODUCT_AB: 7181 A = product->A; 7182 P = product->B; 7183 m = A->rmap->n; 7184 n = P->cmap->n; 7185 M = A->rmap->N; 7186 N = P->cmap->N; 7187 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7188 break; 7189 case MATPRODUCT_AtB: 7190 P = product->A; 7191 A = product->B; 7192 m = P->cmap->n; 7193 n = A->cmap->n; 7194 M = P->cmap->N; 7195 N = A->cmap->N; 7196 hasoffproc = PETSC_TRUE; 7197 break; 7198 case MATPRODUCT_PtAP: 7199 A = product->A; 7200 P = product->B; 7201 m = P->cmap->n; 7202 n = P->cmap->n; 7203 M = P->cmap->N; 7204 N = P->cmap->N; 7205 hasoffproc = PETSC_TRUE; 7206 break; 7207 default: 7208 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7209 } 7210 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7211 if (size == 1) hasoffproc = PETSC_FALSE; 7212 7213 /* defaults */ 7214 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7215 mp[i] = NULL; 7216 mptmp[i] = PETSC_FALSE; 7217 rmapt[i] = -1; 7218 cmapt[i] = -1; 7219 rmapa[i] = NULL; 7220 cmapa[i] = NULL; 7221 } 7222 7223 /* customization */ 7224 PetscCall(PetscNew(&mmdata)); 7225 mmdata->reusesym = product->api_user; 7226 if (ptype == MATPRODUCT_AB) { 7227 if (product->api_user) { 7228 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7229 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7230 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7231 PetscOptionsEnd(); 7232 } else { 7233 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7234 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7235 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7236 PetscOptionsEnd(); 7237 } 7238 } else if (ptype == MATPRODUCT_PtAP) { 7239 if (product->api_user) { 7240 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7241 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7242 PetscOptionsEnd(); 7243 } else { 7244 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7245 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7246 PetscOptionsEnd(); 7247 } 7248 } 7249 a = (Mat_MPIAIJ *)A->data; 7250 p = (Mat_MPIAIJ *)P->data; 7251 PetscCall(MatSetSizes(C, m, n, M, N)); 7252 PetscCall(PetscLayoutSetUp(C->rmap)); 7253 PetscCall(PetscLayoutSetUp(C->cmap)); 7254 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7255 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7256 7257 cp = 0; 7258 switch (ptype) { 7259 case MATPRODUCT_AB: /* A * P */ 7260 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7261 7262 /* A_diag * P_local (merged or not) */ 7263 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7264 /* P is product->B */ 7265 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7266 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7267 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7268 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7269 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7270 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7271 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7272 mp[cp]->product->api_user = product->api_user; 7273 PetscCall(MatProductSetFromOptions(mp[cp])); 7274 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7275 PetscCall(ISGetIndices(glob, &globidx)); 7276 rmapt[cp] = 1; 7277 cmapt[cp] = 2; 7278 cmapa[cp] = globidx; 7279 mptmp[cp] = PETSC_FALSE; 7280 cp++; 7281 } else { /* A_diag * P_diag and A_diag * P_off */ 7282 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 rmapt[cp] = 1; 7292 cmapt[cp] = 1; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 rmapt[cp] = 1; 7305 cmapt[cp] = 2; 7306 cmapa[cp] = p->garray; 7307 mptmp[cp] = PETSC_FALSE; 7308 cp++; 7309 } 7310 7311 /* A_off * P_other */ 7312 if (mmdata->P_oth) { 7313 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7314 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7315 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7316 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7317 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7318 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7319 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7320 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7321 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7322 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7323 mp[cp]->product->api_user = product->api_user; 7324 PetscCall(MatProductSetFromOptions(mp[cp])); 7325 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7326 rmapt[cp] = 1; 7327 cmapt[cp] = 2; 7328 cmapa[cp] = P_oth_idx; 7329 mptmp[cp] = PETSC_FALSE; 7330 cp++; 7331 } 7332 break; 7333 7334 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7335 /* A is product->B */ 7336 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7337 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7338 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7339 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7340 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7341 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7342 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7343 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7344 mp[cp]->product->api_user = product->api_user; 7345 PetscCall(MatProductSetFromOptions(mp[cp])); 7346 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7347 PetscCall(ISGetIndices(glob, &globidx)); 7348 rmapt[cp] = 2; 7349 rmapa[cp] = globidx; 7350 cmapt[cp] = 2; 7351 cmapa[cp] = globidx; 7352 mptmp[cp] = PETSC_FALSE; 7353 cp++; 7354 } else { 7355 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7356 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7357 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7358 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7359 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7360 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7361 mp[cp]->product->api_user = product->api_user; 7362 PetscCall(MatProductSetFromOptions(mp[cp])); 7363 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7364 PetscCall(ISGetIndices(glob, &globidx)); 7365 rmapt[cp] = 1; 7366 cmapt[cp] = 2; 7367 cmapa[cp] = globidx; 7368 mptmp[cp] = PETSC_FALSE; 7369 cp++; 7370 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7371 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7372 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7373 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7374 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7375 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7376 mp[cp]->product->api_user = product->api_user; 7377 PetscCall(MatProductSetFromOptions(mp[cp])); 7378 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7379 rmapt[cp] = 2; 7380 rmapa[cp] = p->garray; 7381 cmapt[cp] = 2; 7382 cmapa[cp] = globidx; 7383 mptmp[cp] = PETSC_FALSE; 7384 cp++; 7385 } 7386 break; 7387 case MATPRODUCT_PtAP: 7388 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7389 /* P is product->B */ 7390 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7391 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7392 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7393 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7394 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7395 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7396 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7397 mp[cp]->product->api_user = product->api_user; 7398 PetscCall(MatProductSetFromOptions(mp[cp])); 7399 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7400 PetscCall(ISGetIndices(glob, &globidx)); 7401 rmapt[cp] = 2; 7402 rmapa[cp] = globidx; 7403 cmapt[cp] = 2; 7404 cmapa[cp] = globidx; 7405 mptmp[cp] = PETSC_FALSE; 7406 cp++; 7407 if (mmdata->P_oth) { 7408 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7409 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7410 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7411 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7412 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7413 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7414 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7415 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7416 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7417 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7418 mp[cp]->product->api_user = product->api_user; 7419 PetscCall(MatProductSetFromOptions(mp[cp])); 7420 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7421 mptmp[cp] = PETSC_TRUE; 7422 cp++; 7423 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7424 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7425 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7426 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7427 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7428 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7429 mp[cp]->product->api_user = product->api_user; 7430 PetscCall(MatProductSetFromOptions(mp[cp])); 7431 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7432 rmapt[cp] = 2; 7433 rmapa[cp] = globidx; 7434 cmapt[cp] = 2; 7435 cmapa[cp] = P_oth_idx; 7436 mptmp[cp] = PETSC_FALSE; 7437 cp++; 7438 } 7439 break; 7440 default: 7441 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7442 } 7443 /* sanity check */ 7444 if (size > 1) 7445 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7446 7447 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7448 for (i = 0; i < cp; i++) { 7449 mmdata->mp[i] = mp[i]; 7450 mmdata->mptmp[i] = mptmp[i]; 7451 } 7452 mmdata->cp = cp; 7453 C->product->data = mmdata; 7454 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7455 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7456 7457 /* memory type */ 7458 mmdata->mtype = PETSC_MEMTYPE_HOST; 7459 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7460 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7461 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7462 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7463 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7464 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7465 7466 /* prepare coo coordinates for values insertion */ 7467 7468 /* count total nonzeros of those intermediate seqaij Mats 7469 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7470 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7471 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7472 */ 7473 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7474 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7475 if (mptmp[cp]) continue; 7476 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7477 const PetscInt *rmap = rmapa[cp]; 7478 const PetscInt mr = mp[cp]->rmap->n; 7479 const PetscInt rs = C->rmap->rstart; 7480 const PetscInt re = C->rmap->rend; 7481 const PetscInt *ii = mm->i; 7482 for (i = 0; i < mr; i++) { 7483 const PetscInt gr = rmap[i]; 7484 const PetscInt nz = ii[i + 1] - ii[i]; 7485 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7486 else ncoo_oown += nz; /* this row is local */ 7487 } 7488 } else ncoo_d += mm->nz; 7489 } 7490 7491 /* 7492 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7493 7494 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7495 7496 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7497 7498 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7499 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7500 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7501 7502 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7503 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7504 */ 7505 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7506 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7507 7508 /* gather (i,j) of nonzeros inserted by remote procs */ 7509 if (hasoffproc) { 7510 PetscSF msf; 7511 PetscInt ncoo2, *coo_i2, *coo_j2; 7512 7513 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7514 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7515 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7516 7517 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7518 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7519 PetscInt *idxoff = mmdata->off[cp]; 7520 PetscInt *idxown = mmdata->own[cp]; 7521 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7522 const PetscInt *rmap = rmapa[cp]; 7523 const PetscInt *cmap = cmapa[cp]; 7524 const PetscInt *ii = mm->i; 7525 PetscInt *coi = coo_i + ncoo_o; 7526 PetscInt *coj = coo_j + ncoo_o; 7527 const PetscInt mr = mp[cp]->rmap->n; 7528 const PetscInt rs = C->rmap->rstart; 7529 const PetscInt re = C->rmap->rend; 7530 const PetscInt cs = C->cmap->rstart; 7531 for (i = 0; i < mr; i++) { 7532 const PetscInt *jj = mm->j + ii[i]; 7533 const PetscInt gr = rmap[i]; 7534 const PetscInt nz = ii[i + 1] - ii[i]; 7535 if (gr < rs || gr >= re) { /* this is an offproc row */ 7536 for (j = ii[i]; j < ii[i + 1]; j++) { 7537 *coi++ = gr; 7538 *idxoff++ = j; 7539 } 7540 if (!cmapt[cp]) { /* already global */ 7541 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7542 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7543 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7544 } else { /* offdiag */ 7545 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7546 } 7547 ncoo_o += nz; 7548 } else { /* this is a local row */ 7549 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7550 } 7551 } 7552 } 7553 mmdata->off[cp + 1] = idxoff; 7554 mmdata->own[cp + 1] = idxown; 7555 } 7556 7557 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7558 PetscInt incoo_o; 7559 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7560 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7561 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7562 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7563 ncoo = ncoo_d + ncoo_oown + ncoo2; 7564 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7565 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7566 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7567 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7568 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7569 PetscCall(PetscFree2(coo_i, coo_j)); 7570 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7571 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7572 coo_i = coo_i2; 7573 coo_j = coo_j2; 7574 } else { /* no offproc values insertion */ 7575 ncoo = ncoo_d; 7576 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7577 7578 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7579 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7580 PetscCall(PetscSFSetUp(mmdata->sf)); 7581 } 7582 mmdata->hasoffproc = hasoffproc; 7583 7584 /* gather (i,j) of nonzeros inserted locally */ 7585 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7586 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7587 PetscInt *coi = coo_i + ncoo_d; 7588 PetscInt *coj = coo_j + ncoo_d; 7589 const PetscInt *jj = mm->j; 7590 const PetscInt *ii = mm->i; 7591 const PetscInt *cmap = cmapa[cp]; 7592 const PetscInt *rmap = rmapa[cp]; 7593 const PetscInt mr = mp[cp]->rmap->n; 7594 const PetscInt rs = C->rmap->rstart; 7595 const PetscInt re = C->rmap->rend; 7596 const PetscInt cs = C->cmap->rstart; 7597 7598 if (mptmp[cp]) continue; 7599 if (rmapt[cp] == 1) { /* consecutive rows */ 7600 /* fill coo_i */ 7601 for (i = 0; i < mr; i++) { 7602 const PetscInt gr = i + rs; 7603 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7604 } 7605 /* fill coo_j */ 7606 if (!cmapt[cp]) { /* type-0, already global */ 7607 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7608 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7609 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7610 } else { /* type-2, local to global for sparse columns */ 7611 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7612 } 7613 ncoo_d += mm->nz; 7614 } else if (rmapt[cp] == 2) { /* sparse rows */ 7615 for (i = 0; i < mr; i++) { 7616 const PetscInt *jj = mm->j + ii[i]; 7617 const PetscInt gr = rmap[i]; 7618 const PetscInt nz = ii[i + 1] - ii[i]; 7619 if (gr >= rs && gr < re) { /* local rows */ 7620 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7621 if (!cmapt[cp]) { /* type-0, already global */ 7622 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7623 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7624 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7625 } else { /* type-2, local to global for sparse columns */ 7626 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7627 } 7628 ncoo_d += nz; 7629 } 7630 } 7631 } 7632 } 7633 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7634 PetscCall(ISDestroy(&glob)); 7635 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7636 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7637 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7638 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7639 7640 /* preallocate with COO data */ 7641 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7642 PetscCall(PetscFree2(coo_i, coo_j)); 7643 PetscFunctionReturn(PETSC_SUCCESS); 7644 } 7645 7646 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7647 { 7648 Mat_Product *product = mat->product; 7649 #if defined(PETSC_HAVE_DEVICE) 7650 PetscBool match = PETSC_FALSE; 7651 PetscBool usecpu = PETSC_FALSE; 7652 #else 7653 PetscBool match = PETSC_TRUE; 7654 #endif 7655 7656 PetscFunctionBegin; 7657 MatCheckProduct(mat, 1); 7658 #if defined(PETSC_HAVE_DEVICE) 7659 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7660 if (match) { /* we can always fallback to the CPU if requested */ 7661 switch (product->type) { 7662 case MATPRODUCT_AB: 7663 if (product->api_user) { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7665 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } else { 7668 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7669 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7670 PetscOptionsEnd(); 7671 } 7672 break; 7673 case MATPRODUCT_AtB: 7674 if (product->api_user) { 7675 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7676 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7677 PetscOptionsEnd(); 7678 } else { 7679 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7680 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7681 PetscOptionsEnd(); 7682 } 7683 break; 7684 case MATPRODUCT_PtAP: 7685 if (product->api_user) { 7686 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7687 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7688 PetscOptionsEnd(); 7689 } else { 7690 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7691 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7692 PetscOptionsEnd(); 7693 } 7694 break; 7695 default: 7696 break; 7697 } 7698 match = (PetscBool)!usecpu; 7699 } 7700 #endif 7701 if (match) { 7702 switch (product->type) { 7703 case MATPRODUCT_AB: 7704 case MATPRODUCT_AtB: 7705 case MATPRODUCT_PtAP: 7706 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7707 break; 7708 default: 7709 break; 7710 } 7711 } 7712 /* fallback to MPIAIJ ops */ 7713 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7714 PetscFunctionReturn(PETSC_SUCCESS); 7715 } 7716 7717 /* 7718 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7719 7720 n - the number of block indices in cc[] 7721 cc - the block indices (must be large enough to contain the indices) 7722 */ 7723 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7724 { 7725 PetscInt cnt = -1, nidx, j; 7726 const PetscInt *idx; 7727 7728 PetscFunctionBegin; 7729 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7730 if (nidx) { 7731 cnt = 0; 7732 cc[cnt] = idx[0] / bs; 7733 for (j = 1; j < nidx; j++) { 7734 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7735 } 7736 } 7737 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7738 *n = cnt + 1; 7739 PetscFunctionReturn(PETSC_SUCCESS); 7740 } 7741 7742 /* 7743 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7744 7745 ncollapsed - the number of block indices 7746 collapsed - the block indices (must be large enough to contain the indices) 7747 */ 7748 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7749 { 7750 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7751 7752 PetscFunctionBegin; 7753 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7754 for (i = start + 1; i < start + bs; i++) { 7755 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7756 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7757 cprevtmp = cprev; 7758 cprev = merged; 7759 merged = cprevtmp; 7760 } 7761 *ncollapsed = nprev; 7762 if (collapsed) *collapsed = cprev; 7763 PetscFunctionReturn(PETSC_SUCCESS); 7764 } 7765 7766 /* 7767 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7768 7769 Input Parameter: 7770 . Amat - matrix 7771 - symmetrize - make the result symmetric 7772 + scale - scale with diagonal 7773 7774 Output Parameter: 7775 . a_Gmat - output scalar graph >= 0 7776 7777 */ 7778 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7779 { 7780 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7781 MPI_Comm comm; 7782 Mat Gmat; 7783 PetscBool ismpiaij, isseqaij; 7784 Mat a, b, c; 7785 MatType jtype; 7786 7787 PetscFunctionBegin; 7788 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7789 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7790 PetscCall(MatGetSize(Amat, &MM, &NN)); 7791 PetscCall(MatGetBlockSize(Amat, &bs)); 7792 nloc = (Iend - Istart) / bs; 7793 7794 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7795 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7796 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7797 7798 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7799 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7800 implementation */ 7801 if (bs > 1) { 7802 PetscCall(MatGetType(Amat, &jtype)); 7803 PetscCall(MatCreate(comm, &Gmat)); 7804 PetscCall(MatSetType(Gmat, jtype)); 7805 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7806 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7807 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7808 PetscInt *d_nnz, *o_nnz; 7809 MatScalar *aa, val, *AA; 7810 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7811 7812 if (isseqaij) { 7813 a = Amat; 7814 b = NULL; 7815 } else { 7816 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7817 a = d->A; 7818 b = d->B; 7819 } 7820 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7821 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7822 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7823 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7824 const PetscInt *cols1, *cols2; 7825 7826 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7827 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7828 nnz[brow / bs] = nc2 / bs; 7829 if (nc2 % bs) ok = 0; 7830 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7831 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7832 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7833 if (nc1 != nc2) ok = 0; 7834 else { 7835 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7836 if (cols1[jj] != cols2[jj]) ok = 0; 7837 if (cols1[jj] % bs != jj % bs) ok = 0; 7838 } 7839 } 7840 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7841 } 7842 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7843 if (!ok) { 7844 PetscCall(PetscFree2(d_nnz, o_nnz)); 7845 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7846 goto old_bs; 7847 } 7848 } 7849 } 7850 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7851 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7852 PetscCall(PetscFree2(d_nnz, o_nnz)); 7853 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7854 // diag 7855 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7856 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7857 7858 ai = aseq->i; 7859 n = ai[brow + 1] - ai[brow]; 7860 aj = aseq->j + ai[brow]; 7861 for (PetscInt k = 0; k < n; k += bs) { // block columns 7862 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7863 val = 0; 7864 if (index_size == 0) { 7865 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7866 aa = aseq->a + ai[brow + ii] + k; 7867 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7868 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7869 } 7870 } 7871 } else { // use (index,index) value if provided 7872 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7873 PetscInt ii = index[iii]; 7874 aa = aseq->a + ai[brow + ii] + k; 7875 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7876 PetscInt jj = index[jjj]; 7877 val += PetscAbs(PetscRealPart(aa[jj])); 7878 } 7879 } 7880 } 7881 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7882 AA[k / bs] = val; 7883 } 7884 grow = Istart / bs + brow / bs; 7885 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7886 } 7887 // off-diag 7888 if (ismpiaij) { 7889 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7890 const PetscScalar *vals; 7891 const PetscInt *cols, *garray = aij->garray; 7892 7893 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7894 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7895 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7896 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7897 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7898 AA[k / bs] = 0; 7899 AJ[cidx] = garray[cols[k]] / bs; 7900 } 7901 nc = ncols / bs; 7902 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7903 if (index_size == 0) { 7904 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7905 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7906 for (PetscInt k = 0; k < ncols; k += bs) { 7907 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7908 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7909 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7910 } 7911 } 7912 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7913 } 7914 } else { // use (index,index) value if provided 7915 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7916 PetscInt ii = index[iii]; 7917 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7918 for (PetscInt k = 0; k < ncols; k += bs) { 7919 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7920 PetscInt jj = index[jjj]; 7921 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7922 } 7923 } 7924 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7925 } 7926 } 7927 grow = Istart / bs + brow / bs; 7928 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7929 } 7930 } 7931 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7932 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7933 PetscCall(PetscFree2(AA, AJ)); 7934 } else { 7935 const PetscScalar *vals; 7936 const PetscInt *idx; 7937 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7938 old_bs: 7939 /* 7940 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7941 */ 7942 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7943 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7944 if (isseqaij) { 7945 PetscInt max_d_nnz; 7946 7947 /* 7948 Determine exact preallocation count for (sequential) scalar matrix 7949 */ 7950 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7951 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7952 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7953 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7954 PetscCall(PetscFree3(w0, w1, w2)); 7955 } else if (ismpiaij) { 7956 Mat Daij, Oaij; 7957 const PetscInt *garray; 7958 PetscInt max_d_nnz; 7959 7960 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7961 /* 7962 Determine exact preallocation count for diagonal block portion of scalar matrix 7963 */ 7964 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7965 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7966 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7967 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7968 PetscCall(PetscFree3(w0, w1, w2)); 7969 /* 7970 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7971 */ 7972 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7973 o_nnz[jj] = 0; 7974 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7975 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7976 o_nnz[jj] += ncols; 7977 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7978 } 7979 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7980 } 7981 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7982 /* get scalar copy (norms) of matrix */ 7983 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7984 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7985 PetscCall(PetscFree2(d_nnz, o_nnz)); 7986 for (Ii = Istart; Ii < Iend; Ii++) { 7987 PetscInt dest_row = Ii / bs; 7988 7989 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7990 for (jj = 0; jj < ncols; jj++) { 7991 PetscInt dest_col = idx[jj] / bs; 7992 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7993 7994 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7995 } 7996 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7997 } 7998 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7999 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8000 } 8001 } else { 8002 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8003 else { 8004 Gmat = Amat; 8005 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8006 } 8007 if (isseqaij) { 8008 a = Gmat; 8009 b = NULL; 8010 } else { 8011 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8012 a = d->A; 8013 b = d->B; 8014 } 8015 if (filter >= 0 || scale) { 8016 /* take absolute value of each entry */ 8017 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8018 MatInfo info; 8019 PetscScalar *avals; 8020 8021 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8022 PetscCall(MatSeqAIJGetArray(c, &avals)); 8023 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8024 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8025 } 8026 } 8027 } 8028 if (symmetrize) { 8029 PetscBool isset, issym; 8030 8031 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8032 if (!isset || !issym) { 8033 Mat matTrans; 8034 8035 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8036 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8037 PetscCall(MatDestroy(&matTrans)); 8038 } 8039 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8040 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8041 if (scale) { 8042 /* scale c for all diagonal values = 1 or -1 */ 8043 Vec diag; 8044 8045 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8046 PetscCall(MatGetDiagonal(Gmat, diag)); 8047 PetscCall(VecReciprocal(diag)); 8048 PetscCall(VecSqrtAbs(diag)); 8049 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8050 PetscCall(VecDestroy(&diag)); 8051 } 8052 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8053 if (filter >= 0) { 8054 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8055 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8056 } 8057 *a_Gmat = Gmat; 8058 PetscFunctionReturn(PETSC_SUCCESS); 8059 } 8060 8061 /* 8062 Special version for direct calls from Fortran 8063 */ 8064 8065 /* Change these macros so can be used in void function */ 8066 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8067 #undef PetscCall 8068 #define PetscCall(...) \ 8069 do { \ 8070 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8071 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8072 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8073 return; \ 8074 } \ 8075 } while (0) 8076 8077 #undef SETERRQ 8078 #define SETERRQ(comm, ierr, ...) \ 8079 do { \ 8080 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8081 return; \ 8082 } while (0) 8083 8084 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8085 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8087 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8088 #else 8089 #endif 8090 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8091 { 8092 Mat mat = *mmat; 8093 PetscInt m = *mm, n = *mn; 8094 InsertMode addv = *maddv; 8095 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8096 PetscScalar value; 8097 8098 MatCheckPreallocated(mat, 1); 8099 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8100 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8101 { 8102 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8103 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8104 PetscBool roworiented = aij->roworiented; 8105 8106 /* Some Variables required in the macro */ 8107 Mat A = aij->A; 8108 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8109 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8110 MatScalar *aa; 8111 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8112 Mat B = aij->B; 8113 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8114 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8115 MatScalar *ba; 8116 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8117 * cannot use "#if defined" inside a macro. */ 8118 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8119 8120 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8121 PetscInt nonew = a->nonew; 8122 MatScalar *ap1, *ap2; 8123 8124 PetscFunctionBegin; 8125 PetscCall(MatSeqAIJGetArray(A, &aa)); 8126 PetscCall(MatSeqAIJGetArray(B, &ba)); 8127 for (i = 0; i < m; i++) { 8128 if (im[i] < 0) continue; 8129 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8130 if (im[i] >= rstart && im[i] < rend) { 8131 row = im[i] - rstart; 8132 lastcol1 = -1; 8133 rp1 = aj + ai[row]; 8134 ap1 = aa + ai[row]; 8135 rmax1 = aimax[row]; 8136 nrow1 = ailen[row]; 8137 low1 = 0; 8138 high1 = nrow1; 8139 lastcol2 = -1; 8140 rp2 = bj + bi[row]; 8141 ap2 = ba + bi[row]; 8142 rmax2 = bimax[row]; 8143 nrow2 = bilen[row]; 8144 low2 = 0; 8145 high2 = nrow2; 8146 8147 for (j = 0; j < n; j++) { 8148 if (roworiented) value = v[i * n + j]; 8149 else value = v[i + j * m]; 8150 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8151 if (in[j] >= cstart && in[j] < cend) { 8152 col = in[j] - cstart; 8153 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8154 } else if (in[j] < 0) continue; 8155 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8156 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8157 } else { 8158 if (mat->was_assembled) { 8159 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8160 #if defined(PETSC_USE_CTABLE) 8161 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8162 col--; 8163 #else 8164 col = aij->colmap[in[j]] - 1; 8165 #endif 8166 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8167 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8168 col = in[j]; 8169 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8170 B = aij->B; 8171 b = (Mat_SeqAIJ *)B->data; 8172 bimax = b->imax; 8173 bi = b->i; 8174 bilen = b->ilen; 8175 bj = b->j; 8176 rp2 = bj + bi[row]; 8177 ap2 = ba + bi[row]; 8178 rmax2 = bimax[row]; 8179 nrow2 = bilen[row]; 8180 low2 = 0; 8181 high2 = nrow2; 8182 bm = aij->B->rmap->n; 8183 ba = b->a; 8184 inserted = PETSC_FALSE; 8185 } 8186 } else col = in[j]; 8187 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8188 } 8189 } 8190 } else if (!aij->donotstash) { 8191 if (roworiented) { 8192 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8193 } else { 8194 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8195 } 8196 } 8197 } 8198 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8199 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8200 } 8201 PetscFunctionReturnVoid(); 8202 } 8203 8204 /* Undefining these here since they were redefined from their original definition above! No 8205 * other PETSc functions should be defined past this point, as it is impossible to recover the 8206 * original definitions */ 8207 #undef PetscCall 8208 #undef SETERRQ 8209