1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1690 break; 1691 case MAT_SUBMAT_SINGLEIS: 1692 A->submat_singleis = flg; 1693 break; 1694 case MAT_STRUCTURE_ONLY: 1695 /* The option is handled directly by MatSetOption() */ 1696 break; 1697 default: 1698 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1704 { 1705 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1706 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1707 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1708 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1709 PetscInt *cmap, *idx_p; 1710 1711 PetscFunctionBegin; 1712 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1713 mat->getrowactive = PETSC_TRUE; 1714 1715 if (!mat->rowvalues && (idx || v)) { 1716 /* 1717 allocate enough space to hold information from the longest row. 1718 */ 1719 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1720 PetscInt max = 1, tmp; 1721 for (i = 0; i < matin->rmap->n; i++) { 1722 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1723 if (max < tmp) max = tmp; 1724 } 1725 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1726 } 1727 1728 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1729 lrow = row - rstart; 1730 1731 pvA = &vworkA; 1732 pcA = &cworkA; 1733 pvB = &vworkB; 1734 pcB = &cworkB; 1735 if (!v) { 1736 pvA = NULL; 1737 pvB = NULL; 1738 } 1739 if (!idx) { 1740 pcA = NULL; 1741 if (!v) pcB = NULL; 1742 } 1743 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1744 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1745 nztot = nzA + nzB; 1746 1747 cmap = mat->garray; 1748 if (v || idx) { 1749 if (nztot) { 1750 /* Sort by increasing column numbers, assuming A and B already sorted */ 1751 PetscInt imark = -1; 1752 if (v) { 1753 *v = v_p = mat->rowvalues; 1754 for (i = 0; i < nzB; i++) { 1755 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1756 else break; 1757 } 1758 imark = i; 1759 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1760 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1761 } 1762 if (idx) { 1763 *idx = idx_p = mat->rowindices; 1764 if (imark > -1) { 1765 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1766 } else { 1767 for (i = 0; i < nzB; i++) { 1768 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1769 else break; 1770 } 1771 imark = i; 1772 } 1773 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1774 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1775 } 1776 } else { 1777 if (idx) *idx = NULL; 1778 if (v) *v = NULL; 1779 } 1780 } 1781 *nz = nztot; 1782 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1783 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1788 { 1789 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1790 1791 PetscFunctionBegin; 1792 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1793 aij->getrowactive = PETSC_FALSE; 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1801 PetscInt i, j, cstart = mat->cmap->rstart; 1802 PetscReal sum = 0.0; 1803 const MatScalar *v, *amata, *bmata; 1804 PetscMPIInt iN; 1805 1806 PetscFunctionBegin; 1807 if (aij->size == 1) { 1808 PetscCall(MatNorm(aij->A, type, norm)); 1809 } else { 1810 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1811 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1812 if (type == NORM_FROBENIUS) { 1813 v = amata; 1814 for (i = 0; i < amat->nz; i++) { 1815 sum += PetscRealPart(PetscConj(*v) * (*v)); 1816 v++; 1817 } 1818 v = bmata; 1819 for (i = 0; i < bmat->nz; i++) { 1820 sum += PetscRealPart(PetscConj(*v) * (*v)); 1821 v++; 1822 } 1823 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp, *tmp2; 1828 PetscInt *jj, *garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1831 *norm = 0.0; 1832 v = amata; 1833 jj = amat->j; 1834 for (j = 0; j < amat->nz; j++) { 1835 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1836 v++; 1837 } 1838 v = bmata; 1839 jj = bmat->j; 1840 for (j = 0; j < bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1842 v++; 1843 } 1844 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1845 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1846 for (j = 0; j < mat->cmap->N; j++) { 1847 if (tmp2[j] > *norm) *norm = tmp2[j]; 1848 } 1849 PetscCall(PetscFree(tmp)); 1850 PetscCall(PetscFree(tmp2)); 1851 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1852 } else if (type == NORM_INFINITY) { /* max row norm */ 1853 PetscReal ntemp = 0.0; 1854 for (j = 0; j < aij->A->rmap->n; j++) { 1855 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1856 sum = 0.0; 1857 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1862 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 if (sum > ntemp) ntemp = sum; 1867 } 1868 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1872 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1873 } 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1878 { 1879 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1880 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1881 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1882 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1883 Mat B, A_diag, *B_diag; 1884 const MatScalar *pbv, *bv; 1885 1886 PetscFunctionBegin; 1887 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1888 ma = A->rmap->n; 1889 na = A->cmap->n; 1890 mb = a->B->rmap->n; 1891 nb = a->B->cmap->n; 1892 ai = Aloc->i; 1893 aj = Aloc->j; 1894 bi = Bloc->i; 1895 bj = Bloc->j; 1896 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1897 PetscInt *d_nnz, *g_nnz, *o_nnz; 1898 PetscSFNode *oloc; 1899 PETSC_UNUSED PetscSF sf; 1900 1901 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1902 /* compute d_nnz for preallocation */ 1903 PetscCall(PetscArrayzero(d_nnz, na)); 1904 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1905 /* compute local off-diagonal contributions */ 1906 PetscCall(PetscArrayzero(g_nnz, nb)); 1907 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1908 /* map those to global */ 1909 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1910 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1911 PetscCall(PetscSFSetFromOptions(sf)); 1912 PetscCall(PetscArrayzero(o_nnz, na)); 1913 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1915 PetscCall(PetscSFDestroy(&sf)); 1916 1917 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1918 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1919 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1920 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1921 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1922 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1923 } else { 1924 B = *matout; 1925 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1926 } 1927 1928 b = (Mat_MPIAIJ *)B->data; 1929 A_diag = a->A; 1930 B_diag = &b->A; 1931 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1932 A_diag_ncol = A_diag->cmap->N; 1933 B_diag_ilen = sub_B_diag->ilen; 1934 B_diag_i = sub_B_diag->i; 1935 1936 /* Set ilen for diagonal of B */ 1937 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1938 1939 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1940 very quickly (=without using MatSetValues), because all writes are local. */ 1941 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1942 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1943 1944 /* copy over the B part */ 1945 PetscCall(PetscMalloc1(bi[mb], &cols)); 1946 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1947 pbv = bv; 1948 row = A->rmap->rstart; 1949 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1950 cols_tmp = cols; 1951 for (i = 0; i < mb; i++) { 1952 ncol = bi[i + 1] - bi[i]; 1953 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1954 row++; 1955 if (pbv) pbv += ncol; 1956 if (cols_tmp) cols_tmp += ncol; 1957 } 1958 PetscCall(PetscFree(cols)); 1959 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1960 1961 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1962 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1963 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1964 *matout = B; 1965 } else { 1966 PetscCall(MatHeaderMerge(A, &B)); 1967 } 1968 PetscFunctionReturn(PETSC_SUCCESS); 1969 } 1970 1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1974 Mat a = aij->A, b = aij->B; 1975 PetscInt s1, s2, s3; 1976 1977 PetscFunctionBegin; 1978 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1979 if (rr) { 1980 PetscCall(VecGetLocalSize(rr, &s1)); 1981 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1982 /* Overlap communication with computation. */ 1983 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1984 } 1985 if (ll) { 1986 PetscCall(VecGetLocalSize(ll, &s1)); 1987 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1988 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1989 } 1990 /* scale the diagonal block */ 1991 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1992 1993 if (rr) { 1994 /* Do a scatter end and then right scale the off-diagonal block */ 1995 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1996 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1997 } 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatSetUnfactored(a->A)); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2011 { 2012 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2013 Mat a, b, c, d; 2014 PetscBool flg; 2015 2016 PetscFunctionBegin; 2017 a = matA->A; 2018 b = matA->B; 2019 c = matB->A; 2020 d = matB->B; 2021 2022 PetscCall(MatEqual(a, c, &flg)); 2023 if (flg) PetscCall(MatEqual(b, d, &flg)); 2024 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2029 { 2030 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2031 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2032 2033 PetscFunctionBegin; 2034 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2035 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2036 /* because of the column compression in the off-processor part of the matrix a->B, 2037 the number of columns in a->B and b->B may be different, hence we cannot call 2038 the MatCopy() directly on the two parts. If need be, we can provide a more 2039 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2040 then copying the submatrices */ 2041 PetscCall(MatCopy_Basic(A, B, str)); 2042 } else { 2043 PetscCall(MatCopy(a->A, b->A, str)); 2044 PetscCall(MatCopy(a->B, b->B, str)); 2045 } 2046 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 /* 2051 Computes the number of nonzeros per row needed for preallocation when X and Y 2052 have different nonzero structure. 2053 */ 2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2055 { 2056 PetscInt i, j, k, nzx, nzy; 2057 2058 PetscFunctionBegin; 2059 /* Set the number of nonzeros in the new matrix */ 2060 for (i = 0; i < m; i++) { 2061 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2062 nzx = xi[i + 1] - xi[i]; 2063 nzy = yi[i + 1] - yi[i]; 2064 nnz[i] = 0; 2065 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2066 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2067 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2068 nnz[i]++; 2069 } 2070 for (; k < nzy; k++) nnz[i]++; 2071 } 2072 PetscFunctionReturn(PETSC_SUCCESS); 2073 } 2074 2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2077 { 2078 PetscInt m = Y->rmap->N; 2079 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2080 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2081 2082 PetscFunctionBegin; 2083 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2088 { 2089 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 if (str == SAME_NONZERO_PATTERN) { 2093 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2094 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2095 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2096 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2097 } else { 2098 Mat B; 2099 PetscInt *nnz_d, *nnz_o; 2100 2101 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2102 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2103 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2104 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2105 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2106 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2107 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2108 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2109 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2110 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2111 PetscCall(MatHeaderMerge(Y, &B)); 2112 PetscCall(PetscFree(nnz_d)); 2113 PetscCall(PetscFree(nnz_o)); 2114 } 2115 PetscFunctionReturn(PETSC_SUCCESS); 2116 } 2117 2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2119 2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2121 { 2122 PetscFunctionBegin; 2123 if (PetscDefined(USE_COMPLEX)) { 2124 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2125 2126 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2127 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2128 } 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2135 2136 PetscFunctionBegin; 2137 PetscCall(MatRealPart(a->A)); 2138 PetscCall(MatRealPart(a->B)); 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2143 { 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCall(MatImaginaryPart(a->A)); 2148 PetscCall(MatImaginaryPart(a->B)); 2149 PetscFunctionReturn(PETSC_SUCCESS); 2150 } 2151 2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 PetscInt i, *idxb = NULL, m = A->rmap->n; 2156 PetscScalar *vv; 2157 Vec vB, vA; 2158 const PetscScalar *va, *vb; 2159 2160 PetscFunctionBegin; 2161 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2162 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2163 2164 PetscCall(VecGetArrayRead(vA, &va)); 2165 if (idx) { 2166 for (i = 0; i < m; i++) { 2167 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2168 } 2169 } 2170 2171 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2172 PetscCall(PetscMalloc1(m, &idxb)); 2173 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2174 2175 PetscCall(VecGetArrayWrite(v, &vv)); 2176 PetscCall(VecGetArrayRead(vB, &vb)); 2177 for (i = 0; i < m; i++) { 2178 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2179 vv[i] = vb[i]; 2180 if (idx) idx[i] = a->garray[idxb[i]]; 2181 } else { 2182 vv[i] = va[i]; 2183 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2184 } 2185 } 2186 PetscCall(VecRestoreArrayWrite(v, &vv)); 2187 PetscCall(VecRestoreArrayRead(vA, &va)); 2188 PetscCall(VecRestoreArrayRead(vB, &vb)); 2189 PetscCall(PetscFree(idxb)); 2190 PetscCall(VecDestroy(&vA)); 2191 PetscCall(VecDestroy(&vB)); 2192 PetscFunctionReturn(PETSC_SUCCESS); 2193 } 2194 2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2196 { 2197 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2198 Vec vB, vA; 2199 2200 PetscFunctionBegin; 2201 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2202 PetscCall(MatGetRowSumAbs(a->A, vA)); 2203 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2204 PetscCall(MatGetRowSumAbs(a->B, vB)); 2205 PetscCall(VecAXPY(vA, 1.0, vB)); 2206 PetscCall(VecDestroy(&vB)); 2207 PetscCall(VecCopy(vA, v)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscFunctionReturn(PETSC_SUCCESS); 2210 } 2211 2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2213 { 2214 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2215 PetscInt m = A->rmap->n, n = A->cmap->n; 2216 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2217 PetscInt *cmap = mat->garray; 2218 PetscInt *diagIdx, *offdiagIdx; 2219 Vec diagV, offdiagV; 2220 PetscScalar *a, *diagA, *offdiagA; 2221 const PetscScalar *ba, *bav; 2222 PetscInt r, j, col, ncols, *bi, *bj; 2223 Mat B = mat->B; 2224 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2225 2226 PetscFunctionBegin; 2227 /* When a process holds entire A and other processes have no entry */ 2228 if (A->cmap->N == n) { 2229 PetscCall(VecGetArrayWrite(v, &diagA)); 2230 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2231 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2232 PetscCall(VecDestroy(&diagV)); 2233 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } else if (n == 0) { 2236 if (m) { 2237 PetscCall(VecGetArrayWrite(v, &a)); 2238 for (r = 0; r < m; r++) { 2239 a[r] = 0.0; 2240 if (idx) idx[r] = -1; 2241 } 2242 PetscCall(VecRestoreArrayWrite(v, &a)); 2243 } 2244 PetscFunctionReturn(PETSC_SUCCESS); 2245 } 2246 2247 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2249 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2251 2252 /* Get offdiagIdx[] for implicit 0.0 */ 2253 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2254 ba = bav; 2255 bi = b->i; 2256 bj = b->j; 2257 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2258 for (r = 0; r < m; r++) { 2259 ncols = bi[r + 1] - bi[r]; 2260 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2261 offdiagA[r] = *ba; 2262 offdiagIdx[r] = cmap[0]; 2263 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2264 offdiagA[r] = 0.0; 2265 2266 /* Find first hole in the cmap */ 2267 for (j = 0; j < ncols; j++) { 2268 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2269 if (col > j && j < cstart) { 2270 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2271 break; 2272 } else if (col > j + n && j >= cstart) { 2273 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2274 break; 2275 } 2276 } 2277 if (j == ncols && ncols < A->cmap->N - n) { 2278 /* a hole is outside compressed Bcols */ 2279 if (ncols == 0) { 2280 if (cstart) { 2281 offdiagIdx[r] = 0; 2282 } else offdiagIdx[r] = cend; 2283 } else { /* ncols > 0 */ 2284 offdiagIdx[r] = cmap[ncols - 1] + 1; 2285 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2286 } 2287 } 2288 } 2289 2290 for (j = 0; j < ncols; j++) { 2291 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2292 offdiagA[r] = *ba; 2293 offdiagIdx[r] = cmap[*bj]; 2294 } 2295 ba++; 2296 bj++; 2297 } 2298 } 2299 2300 PetscCall(VecGetArrayWrite(v, &a)); 2301 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2302 for (r = 0; r < m; ++r) { 2303 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) idx[r] = cstart + diagIdx[r]; 2306 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) { 2309 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2310 idx[r] = cstart + diagIdx[r]; 2311 } else idx[r] = offdiagIdx[r]; 2312 } 2313 } else { 2314 a[r] = offdiagA[r]; 2315 if (idx) idx[r] = offdiagIdx[r]; 2316 } 2317 } 2318 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2319 PetscCall(VecRestoreArrayWrite(v, &a)); 2320 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2322 PetscCall(VecDestroy(&diagV)); 2323 PetscCall(VecDestroy(&offdiagV)); 2324 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2325 PetscFunctionReturn(PETSC_SUCCESS); 2326 } 2327 2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2329 { 2330 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2331 PetscInt m = A->rmap->n, n = A->cmap->n; 2332 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2333 PetscInt *cmap = mat->garray; 2334 PetscInt *diagIdx, *offdiagIdx; 2335 Vec diagV, offdiagV; 2336 PetscScalar *a, *diagA, *offdiagA; 2337 const PetscScalar *ba, *bav; 2338 PetscInt r, j, col, ncols, *bi, *bj; 2339 Mat B = mat->B; 2340 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2341 2342 PetscFunctionBegin; 2343 /* When a process holds entire A and other processes have no entry */ 2344 if (A->cmap->N == n) { 2345 PetscCall(VecGetArrayWrite(v, &diagA)); 2346 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2347 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2348 PetscCall(VecDestroy(&diagV)); 2349 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } else if (n == 0) { 2352 if (m) { 2353 PetscCall(VecGetArrayWrite(v, &a)); 2354 for (r = 0; r < m; r++) { 2355 a[r] = PETSC_MAX_REAL; 2356 if (idx) idx[r] = -1; 2357 } 2358 PetscCall(VecRestoreArrayWrite(v, &a)); 2359 } 2360 PetscFunctionReturn(PETSC_SUCCESS); 2361 } 2362 2363 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2365 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2367 2368 /* Get offdiagIdx[] for implicit 0.0 */ 2369 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2370 ba = bav; 2371 bi = b->i; 2372 bj = b->j; 2373 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2374 for (r = 0; r < m; r++) { 2375 ncols = bi[r + 1] - bi[r]; 2376 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2377 offdiagA[r] = *ba; 2378 offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j = 0; j < ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols - 1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j = 0; j < ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2408 offdiagA[r] = *ba; 2409 offdiagIdx[r] = cmap[*bj]; 2410 } 2411 ba++; 2412 bj++; 2413 } 2414 } 2415 2416 PetscCall(VecGetArrayWrite(v, &a)); 2417 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2418 for (r = 0; r < m; ++r) { 2419 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) idx[r] = cstart + diagIdx[r]; 2422 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2423 a[r] = diagA[r]; 2424 if (idx) { 2425 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2426 idx[r] = cstart + diagIdx[r]; 2427 } else idx[r] = offdiagIdx[r]; 2428 } 2429 } else { 2430 a[r] = offdiagA[r]; 2431 if (idx) idx[r] = offdiagIdx[r]; 2432 } 2433 } 2434 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2435 PetscCall(VecRestoreArrayWrite(v, &a)); 2436 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2438 PetscCall(VecDestroy(&diagV)); 2439 PetscCall(VecDestroy(&offdiagV)); 2440 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2441 PetscFunctionReturn(PETSC_SUCCESS); 2442 } 2443 2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2445 { 2446 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2447 PetscInt m = A->rmap->n, n = A->cmap->n; 2448 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2449 PetscInt *cmap = mat->garray; 2450 PetscInt *diagIdx, *offdiagIdx; 2451 Vec diagV, offdiagV; 2452 PetscScalar *a, *diagA, *offdiagA; 2453 const PetscScalar *ba, *bav; 2454 PetscInt r, j, col, ncols, *bi, *bj; 2455 Mat B = mat->B; 2456 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2457 2458 PetscFunctionBegin; 2459 /* When a process holds entire A and other processes have no entry */ 2460 if (A->cmap->N == n) { 2461 PetscCall(VecGetArrayWrite(v, &diagA)); 2462 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2463 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2464 PetscCall(VecDestroy(&diagV)); 2465 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2466 PetscFunctionReturn(PETSC_SUCCESS); 2467 } else if (n == 0) { 2468 if (m) { 2469 PetscCall(VecGetArrayWrite(v, &a)); 2470 for (r = 0; r < m; r++) { 2471 a[r] = PETSC_MIN_REAL; 2472 if (idx) idx[r] = -1; 2473 } 2474 PetscCall(VecRestoreArrayWrite(v, &a)); 2475 } 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2481 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2483 2484 /* Get offdiagIdx[] for implicit 0.0 */ 2485 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2486 ba = bav; 2487 bi = b->i; 2488 bj = b->j; 2489 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2490 for (r = 0; r < m; r++) { 2491 ncols = bi[r + 1] - bi[r]; 2492 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2493 offdiagA[r] = *ba; 2494 offdiagIdx[r] = cmap[0]; 2495 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2496 offdiagA[r] = 0.0; 2497 2498 /* Find first hole in the cmap */ 2499 for (j = 0; j < ncols; j++) { 2500 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2501 if (col > j && j < cstart) { 2502 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2503 break; 2504 } else if (col > j + n && j >= cstart) { 2505 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2506 break; 2507 } 2508 } 2509 if (j == ncols && ncols < A->cmap->N - n) { 2510 /* a hole is outside compressed Bcols */ 2511 if (ncols == 0) { 2512 if (cstart) { 2513 offdiagIdx[r] = 0; 2514 } else offdiagIdx[r] = cend; 2515 } else { /* ncols > 0 */ 2516 offdiagIdx[r] = cmap[ncols - 1] + 1; 2517 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2518 } 2519 } 2520 } 2521 2522 for (j = 0; j < ncols; j++) { 2523 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2524 offdiagA[r] = *ba; 2525 offdiagIdx[r] = cmap[*bj]; 2526 } 2527 ba++; 2528 bj++; 2529 } 2530 } 2531 2532 PetscCall(VecGetArrayWrite(v, &a)); 2533 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2534 for (r = 0; r < m; ++r) { 2535 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) idx[r] = cstart + diagIdx[r]; 2538 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 if (idx) { 2541 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2542 idx[r] = cstart + diagIdx[r]; 2543 } else idx[r] = offdiagIdx[r]; 2544 } 2545 } else { 2546 a[r] = offdiagA[r]; 2547 if (idx) idx[r] = offdiagIdx[r]; 2548 } 2549 } 2550 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2551 PetscCall(VecRestoreArrayWrite(v, &a)); 2552 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2554 PetscCall(VecDestroy(&diagV)); 2555 PetscCall(VecDestroy(&offdiagV)); 2556 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2561 { 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2566 *newmat = *dummy; 2567 PetscCall(PetscFree(dummy)); 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2574 2575 PetscFunctionBegin; 2576 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2577 A->factorerrortype = a->A->factorerrortype; 2578 PetscFunctionReturn(PETSC_SUCCESS); 2579 } 2580 2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2582 { 2583 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2584 2585 PetscFunctionBegin; 2586 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2587 PetscCall(MatSetRandom(aij->A, rctx)); 2588 if (x->assembled) { 2589 PetscCall(MatSetRandom(aij->B, rctx)); 2590 } else { 2591 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2592 } 2593 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2594 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2599 { 2600 PetscFunctionBegin; 2601 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2602 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2603 PetscFunctionReturn(PETSC_SUCCESS); 2604 } 2605 2606 /*@ 2607 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2608 2609 Not Collective 2610 2611 Input Parameter: 2612 . A - the matrix 2613 2614 Output Parameter: 2615 . nz - the number of nonzeros 2616 2617 Level: advanced 2618 2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2620 @*/ 2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2622 { 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2624 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2625 PetscBool isaij; 2626 2627 PetscFunctionBegin; 2628 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2629 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2630 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 /*@ 2635 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2636 2637 Collective 2638 2639 Input Parameters: 2640 + A - the matrix 2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2642 2643 Level: advanced 2644 2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2646 @*/ 2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2648 { 2649 PetscFunctionBegin; 2650 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2655 { 2656 PetscBool sc = PETSC_FALSE, flg; 2657 2658 PetscFunctionBegin; 2659 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2660 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2661 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2662 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2663 PetscOptionsHeadEnd(); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2668 { 2669 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2670 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2671 2672 PetscFunctionBegin; 2673 if (!Y->preallocated) { 2674 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2675 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2676 PetscInt nonew = aij->nonew; 2677 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2678 aij->nonew = nonew; 2679 } 2680 PetscCall(MatShift_Basic(Y, a)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2690 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2691 if (d) { 2692 PetscInt rstart; 2693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2694 *d += rstart; 2695 } 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2702 2703 PetscFunctionBegin; 2704 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2709 { 2710 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2711 2712 PetscFunctionBegin; 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2714 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2719 MatGetRow_MPIAIJ, 2720 MatRestoreRow_MPIAIJ, 2721 MatMult_MPIAIJ, 2722 /* 4*/ MatMultAdd_MPIAIJ, 2723 MatMultTranspose_MPIAIJ, 2724 MatMultTransposeAdd_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*10*/ NULL, 2729 NULL, 2730 NULL, 2731 MatSOR_MPIAIJ, 2732 MatTranspose_MPIAIJ, 2733 /*15*/ MatGetInfo_MPIAIJ, 2734 MatEqual_MPIAIJ, 2735 MatGetDiagonal_MPIAIJ, 2736 MatDiagonalScale_MPIAIJ, 2737 MatNorm_MPIAIJ, 2738 /*20*/ MatAssemblyBegin_MPIAIJ, 2739 MatAssemblyEnd_MPIAIJ, 2740 MatSetOption_MPIAIJ, 2741 MatZeroEntries_MPIAIJ, 2742 /*24*/ MatZeroRows_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*29*/ MatSetUp_MPI_Hash, 2748 NULL, 2749 NULL, 2750 MatGetDiagonalBlock_MPIAIJ, 2751 NULL, 2752 /*34*/ MatDuplicate_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 NULL, 2757 /*39*/ MatAXPY_MPIAIJ, 2758 MatCreateSubMatrices_MPIAIJ, 2759 MatIncreaseOverlap_MPIAIJ, 2760 MatGetValues_MPIAIJ, 2761 MatCopy_MPIAIJ, 2762 /*44*/ MatGetRowMax_MPIAIJ, 2763 MatScale_MPIAIJ, 2764 MatShift_MPIAIJ, 2765 MatDiagonalSet_MPIAIJ, 2766 MatZeroRowsColumns_MPIAIJ, 2767 /*49*/ MatSetRandom_MPIAIJ, 2768 MatGetRowIJ_MPIAIJ, 2769 MatRestoreRowIJ_MPIAIJ, 2770 NULL, 2771 NULL, 2772 /*54*/ MatFDColoringCreate_MPIXAIJ, 2773 NULL, 2774 MatSetUnfactored_MPIAIJ, 2775 MatPermute_MPIAIJ, 2776 NULL, 2777 /*59*/ MatCreateSubMatrix_MPIAIJ, 2778 MatDestroy_MPIAIJ, 2779 MatView_MPIAIJ, 2780 NULL, 2781 NULL, 2782 /*64*/ NULL, 2783 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2788 MatGetRowMinAbs_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*75*/ MatFDColoringApply_AIJ, 2794 MatSetFromOptions_MPIAIJ, 2795 NULL, 2796 NULL, 2797 MatFindZeroDiagonals_MPIAIJ, 2798 /*80*/ NULL, 2799 NULL, 2800 NULL, 2801 /*83*/ MatLoad_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 NULL, 2807 /*89*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2813 NULL, 2814 NULL, 2815 NULL, 2816 MatBindToCPU_MPIAIJ, 2817 /*99*/ MatProductSetFromOptions_MPIAIJ, 2818 NULL, 2819 NULL, 2820 MatConjugate_MPIAIJ, 2821 NULL, 2822 /*104*/ MatSetValuesRow_MPIAIJ, 2823 MatRealPart_MPIAIJ, 2824 MatImaginaryPart_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*109*/ NULL, 2828 NULL, 2829 MatGetRowMin_MPIAIJ, 2830 NULL, 2831 MatMissingDiagonal_MPIAIJ, 2832 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2833 NULL, 2834 MatGetGhosts_MPIAIJ, 2835 NULL, 2836 NULL, 2837 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2838 NULL, 2839 NULL, 2840 NULL, 2841 MatGetMultiProcBlock_MPIAIJ, 2842 /*124*/ MatFindNonzeroRows_MPIAIJ, 2843 MatGetColumnReductions_MPIAIJ, 2844 MatInvertBlockDiagonal_MPIAIJ, 2845 MatInvertVariableBlockDiagonal_MPIAIJ, 2846 MatCreateSubMatricesMPI_MPIAIJ, 2847 /*129*/ NULL, 2848 NULL, 2849 NULL, 2850 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2851 NULL, 2852 /*134*/ NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 NULL, 2857 /*139*/ MatSetBlockSizes_MPIAIJ, 2858 NULL, 2859 NULL, 2860 MatFDColoringSetUp_MPIXAIJ, 2861 MatFindOffBlockDiagonalEntries_MPIAIJ, 2862 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2863 /*145*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCreateGraph_Simple_AIJ, 2867 NULL, 2868 /*150*/ NULL, 2869 MatEliminateZeros_MPIAIJ, 2870 MatGetRowSumAbs_MPIAIJ, 2871 NULL, 2872 NULL, 2873 NULL}; 2874 2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2876 { 2877 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2878 2879 PetscFunctionBegin; 2880 PetscCall(MatStoreValues(aij->A)); 2881 PetscCall(MatStoreValues(aij->B)); 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2888 2889 PetscFunctionBegin; 2890 PetscCall(MatRetrieveValues(aij->A)); 2891 PetscCall(MatRetrieveValues(aij->B)); 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2896 { 2897 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2898 PetscMPIInt size; 2899 2900 PetscFunctionBegin; 2901 if (B->hash_active) { 2902 B->ops[0] = b->cops; 2903 B->hash_active = PETSC_FALSE; 2904 } 2905 PetscCall(PetscLayoutSetUp(B->rmap)); 2906 PetscCall(PetscLayoutSetUp(B->cmap)); 2907 2908 #if defined(PETSC_USE_CTABLE) 2909 PetscCall(PetscHMapIDestroy(&b->colmap)); 2910 #else 2911 PetscCall(PetscFree(b->colmap)); 2912 #endif 2913 PetscCall(PetscFree(b->garray)); 2914 PetscCall(VecDestroy(&b->lvec)); 2915 PetscCall(VecScatterDestroy(&b->Mvctx)); 2916 2917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2918 2919 MatSeqXAIJGetOptions_Private(b->B); 2920 PetscCall(MatDestroy(&b->B)); 2921 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2922 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2923 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2924 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2925 MatSeqXAIJRestoreOptions_Private(b->B); 2926 2927 MatSeqXAIJGetOptions_Private(b->A); 2928 PetscCall(MatDestroy(&b->A)); 2929 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2930 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2931 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2932 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2933 MatSeqXAIJRestoreOptions_Private(b->A); 2934 2935 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2936 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2937 B->preallocated = PETSC_TRUE; 2938 B->was_assembled = PETSC_FALSE; 2939 B->assembled = PETSC_FALSE; 2940 PetscFunctionReturn(PETSC_SUCCESS); 2941 } 2942 2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2944 { 2945 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2946 PetscBool ondiagreset, offdiagreset, memoryreset; 2947 2948 PetscFunctionBegin; 2949 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2950 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2951 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2952 2953 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2954 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2955 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2956 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2957 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2958 2959 PetscCall(PetscLayoutSetUp(B->rmap)); 2960 PetscCall(PetscLayoutSetUp(B->cmap)); 2961 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2962 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2963 PetscCall(VecScatterDestroy(&b->Mvctx)); 2964 2965 B->preallocated = PETSC_TRUE; 2966 B->was_assembled = PETSC_FALSE; 2967 B->assembled = PETSC_FALSE; 2968 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2969 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2974 { 2975 Mat mat; 2976 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2977 2978 PetscFunctionBegin; 2979 *newmat = NULL; 2980 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2981 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2982 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2983 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2984 a = (Mat_MPIAIJ *)mat->data; 2985 2986 mat->factortype = matin->factortype; 2987 mat->assembled = matin->assembled; 2988 mat->insertmode = NOT_SET_VALUES; 2989 2990 a->size = oldmat->size; 2991 a->rank = oldmat->rank; 2992 a->donotstash = oldmat->donotstash; 2993 a->roworiented = oldmat->roworiented; 2994 a->rowindices = NULL; 2995 a->rowvalues = NULL; 2996 a->getrowactive = PETSC_FALSE; 2997 2998 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2999 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3000 if (matin->hash_active) { 3001 PetscCall(MatSetUp(mat)); 3002 } else { 3003 mat->preallocated = matin->preallocated; 3004 if (oldmat->colmap) { 3005 #if defined(PETSC_USE_CTABLE) 3006 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3007 #else 3008 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3009 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3010 #endif 3011 } else a->colmap = NULL; 3012 if (oldmat->garray) { 3013 PetscInt len; 3014 len = oldmat->B->cmap->n; 3015 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3016 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3017 } else a->garray = NULL; 3018 3019 /* It may happen MatDuplicate is called with a non-assembled matrix 3020 In fact, MatDuplicate only requires the matrix to be preallocated 3021 This may happen inside a DMCreateMatrix_Shell */ 3022 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3023 if (oldmat->Mvctx) { 3024 a->Mvctx = oldmat->Mvctx; 3025 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3026 } 3027 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3028 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3029 } 3030 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3031 *newmat = mat; 3032 PetscFunctionReturn(PETSC_SUCCESS); 3033 } 3034 3035 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3036 { 3037 PetscBool isbinary, ishdf5; 3038 3039 PetscFunctionBegin; 3040 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3041 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3042 /* force binary viewer to load .info file if it has not yet done so */ 3043 PetscCall(PetscViewerSetUp(viewer)); 3044 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3045 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3046 if (isbinary) { 3047 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3048 } else if (ishdf5) { 3049 #if defined(PETSC_HAVE_HDF5) 3050 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3051 #else 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3053 #endif 3054 } else { 3055 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3056 } 3057 PetscFunctionReturn(PETSC_SUCCESS); 3058 } 3059 3060 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3061 { 3062 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3063 PetscInt *rowidxs, *colidxs; 3064 PetscScalar *matvals; 3065 3066 PetscFunctionBegin; 3067 PetscCall(PetscViewerSetUp(viewer)); 3068 3069 /* read in matrix header */ 3070 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3071 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3072 M = header[1]; 3073 N = header[2]; 3074 nz = header[3]; 3075 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3076 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3077 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3078 3079 /* set block sizes from the viewer's .info file */ 3080 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3081 /* set global sizes if not set already */ 3082 if (mat->rmap->N < 0) mat->rmap->N = M; 3083 if (mat->cmap->N < 0) mat->cmap->N = N; 3084 PetscCall(PetscLayoutSetUp(mat->rmap)); 3085 PetscCall(PetscLayoutSetUp(mat->cmap)); 3086 3087 /* check if the matrix sizes are correct */ 3088 PetscCall(MatGetSize(mat, &rows, &cols)); 3089 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3090 3091 /* read in row lengths and build row indices */ 3092 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3093 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3094 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3095 rowidxs[0] = 0; 3096 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3097 if (nz != PETSC_INT_MAX) { 3098 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3099 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3100 } 3101 3102 /* read in column indices and matrix values */ 3103 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3104 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3105 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3106 /* store matrix indices and values */ 3107 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3108 PetscCall(PetscFree(rowidxs)); 3109 PetscCall(PetscFree2(colidxs, matvals)); 3110 PetscFunctionReturn(PETSC_SUCCESS); 3111 } 3112 3113 /* Not scalable because of ISAllGather() unless getting all columns. */ 3114 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3115 { 3116 IS iscol_local; 3117 PetscBool isstride; 3118 PetscMPIInt gisstride = 0; 3119 3120 PetscFunctionBegin; 3121 /* check if we are grabbing all columns*/ 3122 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3123 3124 if (isstride) { 3125 PetscInt start, len, mstart, mlen; 3126 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3127 PetscCall(ISGetLocalSize(iscol, &len)); 3128 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3129 if (mstart == start && mlen - mstart == len) gisstride = 1; 3130 } 3131 3132 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3133 if (gisstride) { 3134 PetscInt N; 3135 PetscCall(MatGetSize(mat, NULL, &N)); 3136 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3137 PetscCall(ISSetIdentity(iscol_local)); 3138 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3139 } else { 3140 PetscInt cbs; 3141 PetscCall(ISGetBlockSize(iscol, &cbs)); 3142 PetscCall(ISAllGather(iscol, &iscol_local)); 3143 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3144 } 3145 3146 *isseq = iscol_local; 3147 PetscFunctionReturn(PETSC_SUCCESS); 3148 } 3149 3150 /* 3151 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3152 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3153 3154 Input Parameters: 3155 + mat - matrix 3156 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3157 i.e., mat->rstart <= isrow[i] < mat->rend 3158 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3159 i.e., mat->cstart <= iscol[i] < mat->cend 3160 3161 Output Parameters: 3162 + isrow_d - sequential row index set for retrieving mat->A 3163 . iscol_d - sequential column index set for retrieving mat->A 3164 . iscol_o - sequential column index set for retrieving mat->B 3165 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3166 */ 3167 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3168 { 3169 Vec x, cmap; 3170 const PetscInt *is_idx; 3171 PetscScalar *xarray, *cmaparray; 3172 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3174 Mat B = a->B; 3175 Vec lvec = a->lvec, lcmap; 3176 PetscInt i, cstart, cend, Bn = B->cmap->N; 3177 MPI_Comm comm; 3178 VecScatter Mvctx = a->Mvctx; 3179 3180 PetscFunctionBegin; 3181 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3182 PetscCall(ISGetLocalSize(iscol, &ncols)); 3183 3184 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3185 PetscCall(MatCreateVecs(mat, &x, NULL)); 3186 PetscCall(VecSet(x, -1.0)); 3187 PetscCall(VecDuplicate(x, &cmap)); 3188 PetscCall(VecSet(cmap, -1.0)); 3189 3190 /* Get start indices */ 3191 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3192 isstart -= ncols; 3193 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3194 3195 PetscCall(ISGetIndices(iscol, &is_idx)); 3196 PetscCall(VecGetArray(x, &xarray)); 3197 PetscCall(VecGetArray(cmap, &cmaparray)); 3198 PetscCall(PetscMalloc1(ncols, &idx)); 3199 for (i = 0; i < ncols; i++) { 3200 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3201 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3202 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3203 } 3204 PetscCall(VecRestoreArray(x, &xarray)); 3205 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3206 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3207 3208 /* Get iscol_d */ 3209 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3210 PetscCall(ISGetBlockSize(iscol, &i)); 3211 PetscCall(ISSetBlockSize(*iscol_d, i)); 3212 3213 /* Get isrow_d */ 3214 PetscCall(ISGetLocalSize(isrow, &m)); 3215 rstart = mat->rmap->rstart; 3216 PetscCall(PetscMalloc1(m, &idx)); 3217 PetscCall(ISGetIndices(isrow, &is_idx)); 3218 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3219 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3220 3221 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3222 PetscCall(ISGetBlockSize(isrow, &i)); 3223 PetscCall(ISSetBlockSize(*isrow_d, i)); 3224 3225 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3226 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3227 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3228 3229 PetscCall(VecDuplicate(lvec, &lcmap)); 3230 3231 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3232 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3233 3234 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3235 /* off-process column indices */ 3236 count = 0; 3237 PetscCall(PetscMalloc1(Bn, &idx)); 3238 PetscCall(PetscMalloc1(Bn, &cmap1)); 3239 3240 PetscCall(VecGetArray(lvec, &xarray)); 3241 PetscCall(VecGetArray(lcmap, &cmaparray)); 3242 for (i = 0; i < Bn; i++) { 3243 if (PetscRealPart(xarray[i]) > -1.0) { 3244 idx[count] = i; /* local column index in off-diagonal part B */ 3245 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3246 count++; 3247 } 3248 } 3249 PetscCall(VecRestoreArray(lvec, &xarray)); 3250 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3251 3252 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3253 /* cannot ensure iscol_o has same blocksize as iscol! */ 3254 3255 PetscCall(PetscFree(idx)); 3256 *garray = cmap1; 3257 3258 PetscCall(VecDestroy(&x)); 3259 PetscCall(VecDestroy(&cmap)); 3260 PetscCall(VecDestroy(&lcmap)); 3261 PetscFunctionReturn(PETSC_SUCCESS); 3262 } 3263 3264 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3265 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3266 { 3267 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3268 Mat M = NULL; 3269 MPI_Comm comm; 3270 IS iscol_d, isrow_d, iscol_o; 3271 Mat Asub = NULL, Bsub = NULL; 3272 PetscInt n; 3273 3274 PetscFunctionBegin; 3275 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3276 3277 if (call == MAT_REUSE_MATRIX) { 3278 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3280 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3281 3282 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3283 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3284 3285 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3286 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3287 3288 /* Update diagonal and off-diagonal portions of submat */ 3289 asub = (Mat_MPIAIJ *)(*submat)->data; 3290 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3291 PetscCall(ISGetLocalSize(iscol_o, &n)); 3292 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3293 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3294 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3295 3296 } else { /* call == MAT_INITIAL_MATRIX) */ 3297 const PetscInt *garray; 3298 PetscInt BsubN; 3299 3300 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3301 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3302 3303 /* Create local submatrices Asub and Bsub */ 3304 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3305 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3306 3307 /* Create submatrix M */ 3308 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3309 3310 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3311 asub = (Mat_MPIAIJ *)M->data; 3312 3313 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3314 n = asub->B->cmap->N; 3315 if (BsubN > n) { 3316 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3317 const PetscInt *idx; 3318 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3319 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3320 3321 PetscCall(PetscMalloc1(n, &idx_new)); 3322 j = 0; 3323 PetscCall(ISGetIndices(iscol_o, &idx)); 3324 for (i = 0; i < n; i++) { 3325 if (j >= BsubN) break; 3326 while (subgarray[i] > garray[j]) j++; 3327 3328 if (subgarray[i] == garray[j]) { 3329 idx_new[i] = idx[j++]; 3330 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3331 } 3332 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3333 3334 PetscCall(ISDestroy(&iscol_o)); 3335 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3336 3337 } else if (BsubN < n) { 3338 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3339 } 3340 3341 PetscCall(PetscFree(garray)); 3342 *submat = M; 3343 3344 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3345 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3346 PetscCall(ISDestroy(&isrow_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3349 PetscCall(ISDestroy(&iscol_d)); 3350 3351 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3352 PetscCall(ISDestroy(&iscol_o)); 3353 } 3354 PetscFunctionReturn(PETSC_SUCCESS); 3355 } 3356 3357 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3358 { 3359 IS iscol_local = NULL, isrow_d; 3360 PetscInt csize; 3361 PetscInt n, i, j, start, end; 3362 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3363 MPI_Comm comm; 3364 3365 PetscFunctionBegin; 3366 /* If isrow has same processor distribution as mat, 3367 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3368 if (call == MAT_REUSE_MATRIX) { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3370 if (isrow_d) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3373 } else { 3374 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3375 if (iscol_local) { 3376 sameRowDist = PETSC_TRUE; 3377 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3378 } 3379 } 3380 } else { 3381 /* Check if isrow has same processor distribution as mat */ 3382 sameDist[0] = PETSC_FALSE; 3383 PetscCall(ISGetLocalSize(isrow, &n)); 3384 if (!n) { 3385 sameDist[0] = PETSC_TRUE; 3386 } else { 3387 PetscCall(ISGetMinMax(isrow, &i, &j)); 3388 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3389 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3390 } 3391 3392 /* Check if iscol has same processor distribution as mat */ 3393 sameDist[1] = PETSC_FALSE; 3394 PetscCall(ISGetLocalSize(iscol, &n)); 3395 if (!n) { 3396 sameDist[1] = PETSC_TRUE; 3397 } else { 3398 PetscCall(ISGetMinMax(iscol, &i, &j)); 3399 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3400 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3401 } 3402 3403 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3404 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3405 sameRowDist = tsameDist[0]; 3406 } 3407 3408 if (sameRowDist) { 3409 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3410 /* isrow and iscol have same processor distribution as mat */ 3411 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } else { /* sameRowDist */ 3414 /* isrow has same processor distribution as mat */ 3415 if (call == MAT_INITIAL_MATRIX) { 3416 PetscBool sorted; 3417 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3418 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3419 PetscCall(ISGetSize(iscol, &i)); 3420 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3421 3422 PetscCall(ISSorted(iscol_local, &sorted)); 3423 if (sorted) { 3424 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } else { /* call == MAT_REUSE_MATRIX */ 3429 IS iscol_sub; 3430 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3431 if (iscol_sub) { 3432 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 } 3436 } 3437 } 3438 3439 /* General case: iscol -> iscol_local which has global size of iscol */ 3440 if (call == MAT_REUSE_MATRIX) { 3441 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3442 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3443 } else { 3444 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3445 } 3446 3447 PetscCall(ISGetLocalSize(iscol, &csize)); 3448 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3449 3450 if (call == MAT_INITIAL_MATRIX) { 3451 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3452 PetscCall(ISDestroy(&iscol_local)); 3453 } 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 3457 /*@C 3458 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3459 and "off-diagonal" part of the matrix in CSR format. 3460 3461 Collective 3462 3463 Input Parameters: 3464 + comm - MPI communicator 3465 . A - "diagonal" portion of matrix 3466 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3467 - garray - global index of `B` columns 3468 3469 Output Parameter: 3470 . mat - the matrix, with input `A` as its local diagonal matrix 3471 3472 Level: advanced 3473 3474 Notes: 3475 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3476 3477 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3478 3479 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3480 @*/ 3481 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3482 { 3483 Mat_MPIAIJ *maij; 3484 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3485 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3486 const PetscScalar *oa; 3487 Mat Bnew; 3488 PetscInt m, n, N; 3489 MatType mpi_mat_type; 3490 3491 PetscFunctionBegin; 3492 PetscCall(MatCreate(comm, mat)); 3493 PetscCall(MatGetSize(A, &m, &n)); 3494 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3495 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3496 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3497 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3498 3499 /* Get global columns of mat */ 3500 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3501 3502 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3503 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3504 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3505 PetscCall(MatSetType(*mat, mpi_mat_type)); 3506 3507 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3508 maij = (Mat_MPIAIJ *)(*mat)->data; 3509 3510 (*mat)->preallocated = PETSC_TRUE; 3511 3512 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3513 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3514 3515 /* Set A as diagonal portion of *mat */ 3516 maij->A = A; 3517 3518 nz = oi[m]; 3519 for (i = 0; i < nz; i++) { 3520 col = oj[i]; 3521 oj[i] = garray[col]; 3522 } 3523 3524 /* Set Bnew as off-diagonal portion of *mat */ 3525 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3526 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3527 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3528 bnew = (Mat_SeqAIJ *)Bnew->data; 3529 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3530 maij->B = Bnew; 3531 3532 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3533 3534 b->free_a = PETSC_FALSE; 3535 b->free_ij = PETSC_FALSE; 3536 PetscCall(MatDestroy(&B)); 3537 3538 bnew->free_a = PETSC_TRUE; 3539 bnew->free_ij = PETSC_TRUE; 3540 3541 /* condense columns of maij->B */ 3542 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3543 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3544 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3545 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3546 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3547 PetscFunctionReturn(PETSC_SUCCESS); 3548 } 3549 3550 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3551 3552 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3553 { 3554 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3555 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3556 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3557 Mat M, Msub, B = a->B; 3558 MatScalar *aa; 3559 Mat_SeqAIJ *aij; 3560 PetscInt *garray = a->garray, *colsub, Ncols; 3561 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3562 IS iscol_sub, iscmap; 3563 const PetscInt *is_idx, *cmap; 3564 PetscBool allcolumns = PETSC_FALSE; 3565 MPI_Comm comm; 3566 3567 PetscFunctionBegin; 3568 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3569 if (call == MAT_REUSE_MATRIX) { 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3571 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3572 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3573 3574 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3575 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3576 3577 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3578 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3579 3580 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3581 3582 } else { /* call == MAT_INITIAL_MATRIX) */ 3583 PetscBool flg; 3584 3585 PetscCall(ISGetLocalSize(iscol, &n)); 3586 PetscCall(ISGetSize(iscol, &Ncols)); 3587 3588 /* (1) iscol -> nonscalable iscol_local */ 3589 /* Check for special case: each processor gets entire matrix columns */ 3590 PetscCall(ISIdentity(iscol_local, &flg)); 3591 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3592 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3593 if (allcolumns) { 3594 iscol_sub = iscol_local; 3595 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3596 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3597 3598 } else { 3599 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3600 PetscInt *idx, *cmap1, k; 3601 PetscCall(PetscMalloc1(Ncols, &idx)); 3602 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3603 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3604 count = 0; 3605 k = 0; 3606 for (i = 0; i < Ncols; i++) { 3607 j = is_idx[i]; 3608 if (j >= cstart && j < cend) { 3609 /* diagonal part of mat */ 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } else if (Bn) { 3613 /* off-diagonal part of mat */ 3614 if (j == garray[k]) { 3615 idx[count] = j; 3616 cmap1[count++] = i; /* column index in submat */ 3617 } else if (j > garray[k]) { 3618 while (j > garray[k] && k < Bn - 1) k++; 3619 if (j == garray[k]) { 3620 idx[count] = j; 3621 cmap1[count++] = i; /* column index in submat */ 3622 } 3623 } 3624 } 3625 } 3626 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3627 3628 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3629 PetscCall(ISGetBlockSize(iscol, &cbs)); 3630 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3631 3632 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3633 } 3634 3635 /* (3) Create sequential Msub */ 3636 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3637 } 3638 3639 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3640 aij = (Mat_SeqAIJ *)Msub->data; 3641 ii = aij->i; 3642 PetscCall(ISGetIndices(iscmap, &cmap)); 3643 3644 /* 3645 m - number of local rows 3646 Ncols - number of columns (same on all processors) 3647 rstart - first row in new global matrix generated 3648 */ 3649 PetscCall(MatGetSize(Msub, &m, NULL)); 3650 3651 if (call == MAT_INITIAL_MATRIX) { 3652 /* (4) Create parallel newmat */ 3653 PetscMPIInt rank, size; 3654 PetscInt csize; 3655 3656 PetscCallMPI(MPI_Comm_size(comm, &size)); 3657 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3658 3659 /* 3660 Determine the number of non-zeros in the diagonal and off-diagonal 3661 portions of the matrix in order to do correct preallocation 3662 */ 3663 3664 /* first get start and end of "diagonal" columns */ 3665 PetscCall(ISGetLocalSize(iscol, &csize)); 3666 if (csize == PETSC_DECIDE) { 3667 PetscCall(ISGetSize(isrow, &mglobal)); 3668 if (mglobal == Ncols) { /* square matrix */ 3669 nlocal = m; 3670 } else { 3671 nlocal = Ncols / size + ((Ncols % size) > rank); 3672 } 3673 } else { 3674 nlocal = csize; 3675 } 3676 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3677 rstart = rend - nlocal; 3678 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3679 3680 /* next, compute all the lengths */ 3681 jj = aij->j; 3682 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3683 olens = dlens + m; 3684 for (i = 0; i < m; i++) { 3685 jend = ii[i + 1] - ii[i]; 3686 olen = 0; 3687 dlen = 0; 3688 for (j = 0; j < jend; j++) { 3689 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3690 else dlen++; 3691 jj++; 3692 } 3693 olens[i] = olen; 3694 dlens[i] = dlen; 3695 } 3696 3697 PetscCall(ISGetBlockSize(isrow, &bs)); 3698 PetscCall(ISGetBlockSize(iscol, &cbs)); 3699 3700 PetscCall(MatCreate(comm, &M)); 3701 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3702 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3703 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3704 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3705 PetscCall(PetscFree(dlens)); 3706 3707 } else { /* call == MAT_REUSE_MATRIX */ 3708 M = *newmat; 3709 PetscCall(MatGetLocalSize(M, &i, NULL)); 3710 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3711 PetscCall(MatZeroEntries(M)); 3712 /* 3713 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3714 rather than the slower MatSetValues(). 3715 */ 3716 M->was_assembled = PETSC_TRUE; 3717 M->assembled = PETSC_FALSE; 3718 } 3719 3720 /* (5) Set values of Msub to *newmat */ 3721 PetscCall(PetscMalloc1(count, &colsub)); 3722 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3723 3724 jj = aij->j; 3725 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3726 for (i = 0; i < m; i++) { 3727 row = rstart + i; 3728 nz = ii[i + 1] - ii[i]; 3729 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3730 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3731 jj += nz; 3732 aa += nz; 3733 } 3734 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3735 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3736 3737 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3738 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3739 3740 PetscCall(PetscFree(colsub)); 3741 3742 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3743 if (call == MAT_INITIAL_MATRIX) { 3744 *newmat = M; 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3746 PetscCall(MatDestroy(&Msub)); 3747 3748 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3749 PetscCall(ISDestroy(&iscol_sub)); 3750 3751 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3752 PetscCall(ISDestroy(&iscmap)); 3753 3754 if (iscol_local) { 3755 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3756 PetscCall(ISDestroy(&iscol_local)); 3757 } 3758 } 3759 PetscFunctionReturn(PETSC_SUCCESS); 3760 } 3761 3762 /* 3763 Not great since it makes two copies of the submatrix, first an SeqAIJ 3764 in local and then by concatenating the local matrices the end result. 3765 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3766 3767 This requires a sequential iscol with all indices. 3768 */ 3769 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3770 { 3771 PetscMPIInt rank, size; 3772 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3773 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3774 Mat M, Mreuse; 3775 MatScalar *aa, *vwork; 3776 MPI_Comm comm; 3777 Mat_SeqAIJ *aij; 3778 PetscBool colflag, allcolumns = PETSC_FALSE; 3779 3780 PetscFunctionBegin; 3781 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3782 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3783 PetscCallMPI(MPI_Comm_size(comm, &size)); 3784 3785 /* Check for special case: each processor gets entire matrix columns */ 3786 PetscCall(ISIdentity(iscol, &colflag)); 3787 PetscCall(ISGetLocalSize(iscol, &n)); 3788 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3789 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3790 3791 if (call == MAT_REUSE_MATRIX) { 3792 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3793 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3794 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3795 } else { 3796 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3797 } 3798 3799 /* 3800 m - number of local rows 3801 n - number of columns (same on all processors) 3802 rstart - first row in new global matrix generated 3803 */ 3804 PetscCall(MatGetSize(Mreuse, &m, &n)); 3805 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3806 if (call == MAT_INITIAL_MATRIX) { 3807 aij = (Mat_SeqAIJ *)Mreuse->data; 3808 ii = aij->i; 3809 jj = aij->j; 3810 3811 /* 3812 Determine the number of non-zeros in the diagonal and off-diagonal 3813 portions of the matrix in order to do correct preallocation 3814 */ 3815 3816 /* first get start and end of "diagonal" columns */ 3817 if (csize == PETSC_DECIDE) { 3818 PetscCall(ISGetSize(isrow, &mglobal)); 3819 if (mglobal == n) { /* square matrix */ 3820 nlocal = m; 3821 } else { 3822 nlocal = n / size + ((n % size) > rank); 3823 } 3824 } else { 3825 nlocal = csize; 3826 } 3827 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3828 rstart = rend - nlocal; 3829 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3830 3831 /* next, compute all the lengths */ 3832 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3833 olens = dlens + m; 3834 for (i = 0; i < m; i++) { 3835 jend = ii[i + 1] - ii[i]; 3836 olen = 0; 3837 dlen = 0; 3838 for (j = 0; j < jend; j++) { 3839 if (*jj < rstart || *jj >= rend) olen++; 3840 else dlen++; 3841 jj++; 3842 } 3843 olens[i] = olen; 3844 dlens[i] = dlen; 3845 } 3846 PetscCall(MatCreate(comm, &M)); 3847 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3848 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3849 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3850 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3851 PetscCall(PetscFree(dlens)); 3852 } else { 3853 PetscInt ml, nl; 3854 3855 M = *newmat; 3856 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3857 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3858 PetscCall(MatZeroEntries(M)); 3859 /* 3860 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3861 rather than the slower MatSetValues(). 3862 */ 3863 M->was_assembled = PETSC_TRUE; 3864 M->assembled = PETSC_FALSE; 3865 } 3866 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3867 aij = (Mat_SeqAIJ *)Mreuse->data; 3868 ii = aij->i; 3869 jj = aij->j; 3870 3871 /* trigger copy to CPU if needed */ 3872 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3873 for (i = 0; i < m; i++) { 3874 row = rstart + i; 3875 nz = ii[i + 1] - ii[i]; 3876 cwork = jj; 3877 jj = PetscSafePointerPlusOffset(jj, nz); 3878 vwork = aa; 3879 aa = PetscSafePointerPlusOffset(aa, nz); 3880 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3881 } 3882 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3883 3884 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3885 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3886 *newmat = M; 3887 3888 /* save submatrix used in processor for next request */ 3889 if (call == MAT_INITIAL_MATRIX) { 3890 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3891 PetscCall(MatDestroy(&Mreuse)); 3892 } 3893 PetscFunctionReturn(PETSC_SUCCESS); 3894 } 3895 3896 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3897 { 3898 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3899 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3900 const PetscInt *JJ; 3901 PetscBool nooffprocentries; 3902 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3903 3904 PetscFunctionBegin; 3905 PetscCall(PetscLayoutSetUp(B->rmap)); 3906 PetscCall(PetscLayoutSetUp(B->cmap)); 3907 m = B->rmap->n; 3908 cstart = B->cmap->rstart; 3909 cend = B->cmap->rend; 3910 rstart = B->rmap->rstart; 3911 irstart = Ii[0]; 3912 3913 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3914 3915 if (PetscDefined(USE_DEBUG)) { 3916 for (i = 0; i < m; i++) { 3917 nnz = Ii[i + 1] - Ii[i]; 3918 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3919 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3920 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3921 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3922 } 3923 } 3924 3925 for (i = 0; i < m; i++) { 3926 nnz = Ii[i + 1] - Ii[i]; 3927 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3928 nnz_max = PetscMax(nnz_max, nnz); 3929 d = 0; 3930 for (j = 0; j < nnz; j++) { 3931 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3932 } 3933 d_nnz[i] = d; 3934 o_nnz[i] = nnz - d; 3935 } 3936 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3937 PetscCall(PetscFree2(d_nnz, o_nnz)); 3938 3939 for (i = 0; i < m; i++) { 3940 ii = i + rstart; 3941 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3942 } 3943 nooffprocentries = B->nooffprocentries; 3944 B->nooffprocentries = PETSC_TRUE; 3945 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3946 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3947 B->nooffprocentries = nooffprocentries; 3948 3949 /* count number of entries below block diagonal */ 3950 PetscCall(PetscFree(Aij->ld)); 3951 PetscCall(PetscCalloc1(m, &ld)); 3952 Aij->ld = ld; 3953 for (i = 0; i < m; i++) { 3954 nnz = Ii[i + 1] - Ii[i]; 3955 j = 0; 3956 while (j < nnz && J[j] < cstart) j++; 3957 ld[i] = j; 3958 if (J) J += nnz; 3959 } 3960 3961 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3962 PetscFunctionReturn(PETSC_SUCCESS); 3963 } 3964 3965 /*@ 3966 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3967 (the default parallel PETSc format). 3968 3969 Collective 3970 3971 Input Parameters: 3972 + B - the matrix 3973 . i - the indices into `j` for the start of each local row (indices start with zero) 3974 . j - the column indices for each local row (indices start with zero) 3975 - v - optional values in the matrix 3976 3977 Level: developer 3978 3979 Notes: 3980 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3981 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3982 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3983 3984 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3985 3986 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3987 3988 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3989 3990 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3991 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3992 3993 The format which is used for the sparse matrix input, is equivalent to a 3994 row-major ordering.. i.e for the following matrix, the input data expected is 3995 as shown 3996 .vb 3997 1 0 0 3998 2 0 3 P0 3999 ------- 4000 4 5 6 P1 4001 4002 Process0 [P0] rows_owned=[0,1] 4003 i = {0,1,3} [size = nrow+1 = 2+1] 4004 j = {0,0,2} [size = 3] 4005 v = {1,2,3} [size = 3] 4006 4007 Process1 [P1] rows_owned=[2] 4008 i = {0,3} [size = nrow+1 = 1+1] 4009 j = {0,1,2} [size = 3] 4010 v = {4,5,6} [size = 3] 4011 .ve 4012 4013 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4014 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4015 @*/ 4016 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4017 { 4018 PetscFunctionBegin; 4019 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4020 PetscFunctionReturn(PETSC_SUCCESS); 4021 } 4022 4023 /*@ 4024 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4025 (the default parallel PETSc format). For good matrix assembly performance 4026 the user should preallocate the matrix storage by setting the parameters 4027 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4028 4029 Collective 4030 4031 Input Parameters: 4032 + B - the matrix 4033 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4034 (same value is used for all local rows) 4035 . d_nnz - array containing the number of nonzeros in the various rows of the 4036 DIAGONAL portion of the local submatrix (possibly different for each row) 4037 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4038 The size of this array is equal to the number of local rows, i.e 'm'. 4039 For matrices that will be factored, you must leave room for (and set) 4040 the diagonal entry even if it is zero. 4041 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4042 submatrix (same value is used for all local rows). 4043 - o_nnz - array containing the number of nonzeros in the various rows of the 4044 OFF-DIAGONAL portion of the local submatrix (possibly different for 4045 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4046 structure. The size of this array is equal to the number 4047 of local rows, i.e 'm'. 4048 4049 Example Usage: 4050 Consider the following 8x8 matrix with 34 non-zero values, that is 4051 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4052 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4053 as follows 4054 4055 .vb 4056 1 2 0 | 0 3 0 | 0 4 4057 Proc0 0 5 6 | 7 0 0 | 8 0 4058 9 0 10 | 11 0 0 | 12 0 4059 ------------------------------------- 4060 13 0 14 | 15 16 17 | 0 0 4061 Proc1 0 18 0 | 19 20 21 | 0 0 4062 0 0 0 | 22 23 0 | 24 0 4063 ------------------------------------- 4064 Proc2 25 26 27 | 0 0 28 | 29 0 4065 30 0 0 | 31 32 33 | 0 34 4066 .ve 4067 4068 This can be represented as a collection of submatrices as 4069 .vb 4070 A B C 4071 D E F 4072 G H I 4073 .ve 4074 4075 Where the submatrices A,B,C are owned by proc0, D,E,F are 4076 owned by proc1, G,H,I are owned by proc2. 4077 4078 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4079 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4080 The 'M','N' parameters are 8,8, and have the same values on all procs. 4081 4082 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4083 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4084 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4085 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4086 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4087 matrix, and [DF] as another `MATSEQAIJ` matrix. 4088 4089 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4090 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4091 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4092 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4093 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4094 In this case, the values of `d_nz`, `o_nz` are 4095 .vb 4096 proc0 dnz = 2, o_nz = 2 4097 proc1 dnz = 3, o_nz = 2 4098 proc2 dnz = 1, o_nz = 4 4099 .ve 4100 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4101 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4102 for proc3. i.e we are using 12+15+10=37 storage locations to store 4103 34 values. 4104 4105 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4106 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4107 In the above case the values for `d_nnz`, `o_nnz` are 4108 .vb 4109 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4110 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4111 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4112 .ve 4113 Here the space allocated is sum of all the above values i.e 34, and 4114 hence pre-allocation is perfect. 4115 4116 Level: intermediate 4117 4118 Notes: 4119 If the *_nnz parameter is given then the *_nz parameter is ignored 4120 4121 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4122 storage. The stored row and column indices begin with zero. 4123 See [Sparse Matrices](sec_matsparse) for details. 4124 4125 The parallel matrix is partitioned such that the first m0 rows belong to 4126 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4127 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4128 4129 The DIAGONAL portion of the local submatrix of a processor can be defined 4130 as the submatrix which is obtained by extraction the part corresponding to 4131 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4132 first row that belongs to the processor, r2 is the last row belonging to 4133 the this processor, and c1-c2 is range of indices of the local part of a 4134 vector suitable for applying the matrix to. This is an mxn matrix. In the 4135 common case of a square matrix, the row and column ranges are the same and 4136 the DIAGONAL part is also square. The remaining portion of the local 4137 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4138 4139 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4140 4141 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4142 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4143 You can also run with the option `-info` and look for messages with the string 4144 malloc in them to see if additional memory allocation was needed. 4145 4146 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4147 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4148 @*/ 4149 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4150 { 4151 PetscFunctionBegin; 4152 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4153 PetscValidType(B, 1); 4154 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4155 PetscFunctionReturn(PETSC_SUCCESS); 4156 } 4157 4158 /*@ 4159 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4160 CSR format for the local rows. 4161 4162 Collective 4163 4164 Input Parameters: 4165 + comm - MPI communicator 4166 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4167 . n - This value should be the same as the local size used in creating the 4168 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4169 calculated if `N` is given) For square matrices n is almost always `m`. 4170 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4171 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4172 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4173 . j - global column indices 4174 - a - optional matrix values 4175 4176 Output Parameter: 4177 . mat - the matrix 4178 4179 Level: intermediate 4180 4181 Notes: 4182 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4183 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4184 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4185 4186 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4187 4188 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4189 4190 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4191 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4192 4193 The format which is used for the sparse matrix input, is equivalent to a 4194 row-major ordering, i.e., for the following matrix, the input data expected is 4195 as shown 4196 .vb 4197 1 0 0 4198 2 0 3 P0 4199 ------- 4200 4 5 6 P1 4201 4202 Process0 [P0] rows_owned=[0,1] 4203 i = {0,1,3} [size = nrow+1 = 2+1] 4204 j = {0,0,2} [size = 3] 4205 v = {1,2,3} [size = 3] 4206 4207 Process1 [P1] rows_owned=[2] 4208 i = {0,3} [size = nrow+1 = 1+1] 4209 j = {0,1,2} [size = 3] 4210 v = {4,5,6} [size = 3] 4211 .ve 4212 4213 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4214 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4215 @*/ 4216 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4217 { 4218 PetscFunctionBegin; 4219 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4220 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4221 PetscCall(MatCreate(comm, mat)); 4222 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4223 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4224 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4225 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4226 PetscFunctionReturn(PETSC_SUCCESS); 4227 } 4228 4229 /*@ 4230 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4231 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4232 from `MatCreateMPIAIJWithArrays()` 4233 4234 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4235 4236 Collective 4237 4238 Input Parameters: 4239 + mat - the matrix 4240 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4241 . n - This value should be the same as the local size used in creating the 4242 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4243 calculated if N is given) For square matrices n is almost always m. 4244 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4245 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4246 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4247 . J - column indices 4248 - v - matrix values 4249 4250 Level: deprecated 4251 4252 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4253 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4254 @*/ 4255 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4256 { 4257 PetscInt nnz, i; 4258 PetscBool nooffprocentries; 4259 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4260 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4261 PetscScalar *ad, *ao; 4262 PetscInt ldi, Iii, md; 4263 const PetscInt *Adi = Ad->i; 4264 PetscInt *ld = Aij->ld; 4265 4266 PetscFunctionBegin; 4267 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4268 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4269 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4270 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4271 4272 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4274 4275 for (i = 0; i < m; i++) { 4276 if (PetscDefined(USE_DEBUG)) { 4277 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4278 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4279 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4280 } 4281 } 4282 nnz = Ii[i + 1] - Ii[i]; 4283 Iii = Ii[i]; 4284 ldi = ld[i]; 4285 md = Adi[i + 1] - Adi[i]; 4286 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4287 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4288 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4289 ad += md; 4290 ao += nnz - md; 4291 } 4292 nooffprocentries = mat->nooffprocentries; 4293 mat->nooffprocentries = PETSC_TRUE; 4294 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4295 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4297 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4298 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4299 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4300 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4301 mat->nooffprocentries = nooffprocentries; 4302 PetscFunctionReturn(PETSC_SUCCESS); 4303 } 4304 4305 /*@ 4306 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4307 4308 Collective 4309 4310 Input Parameters: 4311 + mat - the matrix 4312 - v - matrix values, stored by row 4313 4314 Level: intermediate 4315 4316 Notes: 4317 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4318 4319 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4320 4321 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4322 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4323 @*/ 4324 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4325 { 4326 PetscInt nnz, i, m; 4327 PetscBool nooffprocentries; 4328 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4329 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4330 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4331 PetscScalar *ad, *ao; 4332 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4333 PetscInt ldi, Iii, md; 4334 PetscInt *ld = Aij->ld; 4335 4336 PetscFunctionBegin; 4337 m = mat->rmap->n; 4338 4339 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4340 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4341 Iii = 0; 4342 for (i = 0; i < m; i++) { 4343 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4344 ldi = ld[i]; 4345 md = Adi[i + 1] - Adi[i]; 4346 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4347 ad += md; 4348 if (ao) { 4349 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4350 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4351 ao += nnz - md; 4352 } 4353 Iii += nnz; 4354 } 4355 nooffprocentries = mat->nooffprocentries; 4356 mat->nooffprocentries = PETSC_TRUE; 4357 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4358 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4359 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4360 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4361 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4362 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4363 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4364 mat->nooffprocentries = nooffprocentries; 4365 PetscFunctionReturn(PETSC_SUCCESS); 4366 } 4367 4368 /*@ 4369 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4370 (the default parallel PETSc format). For good matrix assembly performance 4371 the user should preallocate the matrix storage by setting the parameters 4372 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4373 4374 Collective 4375 4376 Input Parameters: 4377 + comm - MPI communicator 4378 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4379 This value should be the same as the local size used in creating the 4380 y vector for the matrix-vector product y = Ax. 4381 . n - This value should be the same as the local size used in creating the 4382 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4383 calculated if N is given) For square matrices n is almost always m. 4384 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4385 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4386 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4387 (same value is used for all local rows) 4388 . d_nnz - array containing the number of nonzeros in the various rows of the 4389 DIAGONAL portion of the local submatrix (possibly different for each row) 4390 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4391 The size of this array is equal to the number of local rows, i.e 'm'. 4392 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4393 submatrix (same value is used for all local rows). 4394 - o_nnz - array containing the number of nonzeros in the various rows of the 4395 OFF-DIAGONAL portion of the local submatrix (possibly different for 4396 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4397 structure. The size of this array is equal to the number 4398 of local rows, i.e 'm'. 4399 4400 Output Parameter: 4401 . A - the matrix 4402 4403 Options Database Keys: 4404 + -mat_no_inode - Do not use inodes 4405 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4406 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4407 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4408 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4409 4410 Level: intermediate 4411 4412 Notes: 4413 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4414 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4415 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4416 4417 If the *_nnz parameter is given then the *_nz parameter is ignored 4418 4419 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4420 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4421 storage requirements for this matrix. 4422 4423 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4424 processor than it must be used on all processors that share the object for 4425 that argument. 4426 4427 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4428 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4429 4430 The user MUST specify either the local or global matrix dimensions 4431 (possibly both). 4432 4433 The parallel matrix is partitioned across processors such that the 4434 first `m0` rows belong to process 0, the next `m1` rows belong to 4435 process 1, the next `m2` rows belong to process 2, etc., where 4436 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4437 values corresponding to [m x N] submatrix. 4438 4439 The columns are logically partitioned with the n0 columns belonging 4440 to 0th partition, the next n1 columns belonging to the next 4441 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4442 4443 The DIAGONAL portion of the local submatrix on any given processor 4444 is the submatrix corresponding to the rows and columns m,n 4445 corresponding to the given processor. i.e diagonal matrix on 4446 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4447 etc. The remaining portion of the local submatrix [m x (N-n)] 4448 constitute the OFF-DIAGONAL portion. The example below better 4449 illustrates this concept. The two matrices, the DIAGONAL portion and 4450 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4451 4452 For a square global matrix we define each processor's diagonal portion 4453 to be its local rows and the corresponding columns (a square submatrix); 4454 each processor's off-diagonal portion encompasses the remainder of the 4455 local matrix (a rectangular submatrix). 4456 4457 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4458 4459 When calling this routine with a single process communicator, a matrix of 4460 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4461 type of communicator, use the construction mechanism 4462 .vb 4463 MatCreate(..., &A); 4464 MatSetType(A, MATMPIAIJ); 4465 MatSetSizes(A, m, n, M, N); 4466 MatMPIAIJSetPreallocation(A, ...); 4467 .ve 4468 4469 By default, this format uses inodes (identical nodes) when possible. 4470 We search for consecutive rows with the same nonzero structure, thereby 4471 reusing matrix information to achieve increased efficiency. 4472 4473 Example Usage: 4474 Consider the following 8x8 matrix with 34 non-zero values, that is 4475 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4476 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4477 as follows 4478 4479 .vb 4480 1 2 0 | 0 3 0 | 0 4 4481 Proc0 0 5 6 | 7 0 0 | 8 0 4482 9 0 10 | 11 0 0 | 12 0 4483 ------------------------------------- 4484 13 0 14 | 15 16 17 | 0 0 4485 Proc1 0 18 0 | 19 20 21 | 0 0 4486 0 0 0 | 22 23 0 | 24 0 4487 ------------------------------------- 4488 Proc2 25 26 27 | 0 0 28 | 29 0 4489 30 0 0 | 31 32 33 | 0 34 4490 .ve 4491 4492 This can be represented as a collection of submatrices as 4493 4494 .vb 4495 A B C 4496 D E F 4497 G H I 4498 .ve 4499 4500 Where the submatrices A,B,C are owned by proc0, D,E,F are 4501 owned by proc1, G,H,I are owned by proc2. 4502 4503 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4504 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4505 The 'M','N' parameters are 8,8, and have the same values on all procs. 4506 4507 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4508 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4509 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4510 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4511 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4512 matrix, and [DF] as another SeqAIJ matrix. 4513 4514 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4515 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4516 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4517 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4518 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4519 In this case, the values of `d_nz`,`o_nz` are 4520 .vb 4521 proc0 dnz = 2, o_nz = 2 4522 proc1 dnz = 3, o_nz = 2 4523 proc2 dnz = 1, o_nz = 4 4524 .ve 4525 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4526 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4527 for proc3. i.e we are using 12+15+10=37 storage locations to store 4528 34 values. 4529 4530 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4531 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4532 In the above case the values for d_nnz,o_nnz are 4533 .vb 4534 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4535 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4536 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4537 .ve 4538 Here the space allocated is sum of all the above values i.e 34, and 4539 hence pre-allocation is perfect. 4540 4541 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4542 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4543 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4544 @*/ 4545 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4546 { 4547 PetscMPIInt size; 4548 4549 PetscFunctionBegin; 4550 PetscCall(MatCreate(comm, A)); 4551 PetscCall(MatSetSizes(*A, m, n, M, N)); 4552 PetscCallMPI(MPI_Comm_size(comm, &size)); 4553 if (size > 1) { 4554 PetscCall(MatSetType(*A, MATMPIAIJ)); 4555 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4556 } else { 4557 PetscCall(MatSetType(*A, MATSEQAIJ)); 4558 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4559 } 4560 PetscFunctionReturn(PETSC_SUCCESS); 4561 } 4562 4563 /*MC 4564 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4565 4566 Synopsis: 4567 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4568 4569 Not Collective 4570 4571 Input Parameter: 4572 . A - the `MATMPIAIJ` matrix 4573 4574 Output Parameters: 4575 + Ad - the diagonal portion of the matrix 4576 . Ao - the off-diagonal portion of the matrix 4577 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4578 - ierr - error code 4579 4580 Level: advanced 4581 4582 Note: 4583 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4584 4585 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4586 M*/ 4587 4588 /*MC 4589 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4590 4591 Synopsis: 4592 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4593 4594 Not Collective 4595 4596 Input Parameters: 4597 + A - the `MATMPIAIJ` matrix 4598 . Ad - the diagonal portion of the matrix 4599 . Ao - the off-diagonal portion of the matrix 4600 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4601 - ierr - error code 4602 4603 Level: advanced 4604 4605 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4606 M*/ 4607 4608 /*@C 4609 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4610 4611 Not Collective 4612 4613 Input Parameter: 4614 . A - The `MATMPIAIJ` matrix 4615 4616 Output Parameters: 4617 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4618 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4619 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4620 4621 Level: intermediate 4622 4623 Note: 4624 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4625 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4626 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4627 local column numbers to global column numbers in the original matrix. 4628 4629 Fortran Notes: 4630 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4631 4632 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4633 @*/ 4634 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4635 { 4636 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4637 PetscBool flg; 4638 4639 PetscFunctionBegin; 4640 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4641 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4642 if (Ad) *Ad = a->A; 4643 if (Ao) *Ao = a->B; 4644 if (colmap) *colmap = a->garray; 4645 PetscFunctionReturn(PETSC_SUCCESS); 4646 } 4647 4648 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4649 { 4650 PetscInt m, N, i, rstart, nnz, Ii; 4651 PetscInt *indx; 4652 PetscScalar *values; 4653 MatType rootType; 4654 4655 PetscFunctionBegin; 4656 PetscCall(MatGetSize(inmat, &m, &N)); 4657 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4658 PetscInt *dnz, *onz, sum, bs, cbs; 4659 4660 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4661 /* Check sum(n) = N */ 4662 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4663 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4664 4665 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4666 rstart -= m; 4667 4668 MatPreallocateBegin(comm, m, n, dnz, onz); 4669 for (i = 0; i < m; i++) { 4670 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4671 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4672 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4673 } 4674 4675 PetscCall(MatCreate(comm, outmat)); 4676 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4677 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4678 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4679 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4680 PetscCall(MatSetType(*outmat, rootType)); 4681 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4682 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4683 MatPreallocateEnd(dnz, onz); 4684 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4685 } 4686 4687 /* numeric phase */ 4688 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4689 for (i = 0; i < m; i++) { 4690 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4691 Ii = i + rstart; 4692 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4693 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4694 } 4695 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4696 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4697 PetscFunctionReturn(PETSC_SUCCESS); 4698 } 4699 4700 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4701 { 4702 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4703 4704 PetscFunctionBegin; 4705 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4706 PetscCall(PetscFree(merge->id_r)); 4707 PetscCall(PetscFree(merge->len_s)); 4708 PetscCall(PetscFree(merge->len_r)); 4709 PetscCall(PetscFree(merge->bi)); 4710 PetscCall(PetscFree(merge->bj)); 4711 PetscCall(PetscFree(merge->buf_ri[0])); 4712 PetscCall(PetscFree(merge->buf_ri)); 4713 PetscCall(PetscFree(merge->buf_rj[0])); 4714 PetscCall(PetscFree(merge->buf_rj)); 4715 PetscCall(PetscFree(merge->coi)); 4716 PetscCall(PetscFree(merge->coj)); 4717 PetscCall(PetscFree(merge->owners_co)); 4718 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4719 PetscCall(PetscFree(merge)); 4720 PetscFunctionReturn(PETSC_SUCCESS); 4721 } 4722 4723 #include <../src/mat/utils/freespace.h> 4724 #include <petscbt.h> 4725 4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4727 { 4728 MPI_Comm comm; 4729 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4730 PetscMPIInt size, rank, taga, *len_s; 4731 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4732 PetscMPIInt proc, k; 4733 PetscInt **buf_ri, **buf_rj; 4734 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4735 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4736 MPI_Request *s_waits, *r_waits; 4737 MPI_Status *status; 4738 const MatScalar *aa, *a_a; 4739 MatScalar **abuf_r, *ba_i; 4740 Mat_Merge_SeqsToMPI *merge; 4741 PetscContainer container; 4742 4743 PetscFunctionBegin; 4744 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4745 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4746 4747 PetscCallMPI(MPI_Comm_size(comm, &size)); 4748 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4749 4750 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4751 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4752 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4753 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4754 aa = a_a; 4755 4756 bi = merge->bi; 4757 bj = merge->bj; 4758 buf_ri = merge->buf_ri; 4759 buf_rj = merge->buf_rj; 4760 4761 PetscCall(PetscMalloc1(size, &status)); 4762 owners = merge->rowmap->range; 4763 len_s = merge->len_s; 4764 4765 /* send and recv matrix values */ 4766 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4767 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4768 4769 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4770 for (proc = 0, k = 0; proc < size; proc++) { 4771 if (!len_s[proc]) continue; 4772 i = owners[proc]; 4773 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4774 k++; 4775 } 4776 4777 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4778 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4779 PetscCall(PetscFree(status)); 4780 4781 PetscCall(PetscFree(s_waits)); 4782 PetscCall(PetscFree(r_waits)); 4783 4784 /* insert mat values of mpimat */ 4785 PetscCall(PetscMalloc1(N, &ba_i)); 4786 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4787 4788 for (k = 0; k < merge->nrecv; k++) { 4789 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4790 nrows = *buf_ri_k[k]; 4791 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4792 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4793 } 4794 4795 /* set values of ba */ 4796 m = merge->rowmap->n; 4797 for (i = 0; i < m; i++) { 4798 arow = owners[rank] + i; 4799 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4800 bnzi = bi[i + 1] - bi[i]; 4801 PetscCall(PetscArrayzero(ba_i, bnzi)); 4802 4803 /* add local non-zero vals of this proc's seqmat into ba */ 4804 anzi = ai[arow + 1] - ai[arow]; 4805 aj = a->j + ai[arow]; 4806 aa = a_a + ai[arow]; 4807 nextaj = 0; 4808 for (j = 0; nextaj < anzi; j++) { 4809 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4810 ba_i[j] += aa[nextaj++]; 4811 } 4812 } 4813 4814 /* add received vals into ba */ 4815 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4816 /* i-th row */ 4817 if (i == *nextrow[k]) { 4818 anzi = *(nextai[k] + 1) - *nextai[k]; 4819 aj = buf_rj[k] + *nextai[k]; 4820 aa = abuf_r[k] + *nextai[k]; 4821 nextaj = 0; 4822 for (j = 0; nextaj < anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 nextrow[k]++; 4828 nextai[k]++; 4829 } 4830 } 4831 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4832 } 4833 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4834 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4835 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4836 4837 PetscCall(PetscFree(abuf_r[0])); 4838 PetscCall(PetscFree(abuf_r)); 4839 PetscCall(PetscFree(ba_i)); 4840 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4841 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4842 PetscFunctionReturn(PETSC_SUCCESS); 4843 } 4844 4845 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4846 { 4847 Mat B_mpi; 4848 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4849 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4850 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4851 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4852 PetscInt len, *dnz, *onz, bs, cbs; 4853 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4854 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4855 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4856 MPI_Status *status; 4857 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4858 PetscBT lnkbt; 4859 Mat_Merge_SeqsToMPI *merge; 4860 PetscContainer container; 4861 4862 PetscFunctionBegin; 4863 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4864 4865 /* make sure it is a PETSc comm */ 4866 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4867 PetscCallMPI(MPI_Comm_size(comm, &size)); 4868 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4869 4870 PetscCall(PetscNew(&merge)); 4871 PetscCall(PetscMalloc1(size, &status)); 4872 4873 /* determine row ownership */ 4874 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4875 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4876 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4877 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4878 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4879 PetscCall(PetscMalloc1(size, &len_si)); 4880 PetscCall(PetscMalloc1(size, &merge->len_s)); 4881 4882 m = merge->rowmap->n; 4883 owners = merge->rowmap->range; 4884 4885 /* determine the number of messages to send, their lengths */ 4886 len_s = merge->len_s; 4887 4888 len = 0; /* length of buf_si[] */ 4889 merge->nsend = 0; 4890 for (PetscMPIInt proc = 0; proc < size; proc++) { 4891 len_si[proc] = 0; 4892 if (proc == rank) { 4893 len_s[proc] = 0; 4894 } else { 4895 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4896 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4897 } 4898 if (len_s[proc]) { 4899 merge->nsend++; 4900 nrows = 0; 4901 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4902 if (ai[i + 1] > ai[i]) nrows++; 4903 } 4904 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4905 len += len_si[proc]; 4906 } 4907 } 4908 4909 /* determine the number and length of messages to receive for ij-structure */ 4910 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4911 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4912 4913 /* post the Irecv of j-structure */ 4914 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4915 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4916 4917 /* post the Isend of j-structure */ 4918 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4919 4920 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4921 if (!len_s[proc]) continue; 4922 i = owners[proc]; 4923 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4924 k++; 4925 } 4926 4927 /* receives and sends of j-structure are complete */ 4928 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4929 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4930 4931 /* send and recv i-structure */ 4932 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4933 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4934 4935 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4936 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4937 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4938 if (!len_s[proc]) continue; 4939 /* form outgoing message for i-structure: 4940 buf_si[0]: nrows to be sent 4941 [1:nrows]: row index (global) 4942 [nrows+1:2*nrows+1]: i-structure index 4943 */ 4944 nrows = len_si[proc] / 2 - 1; 4945 buf_si_i = buf_si + nrows + 1; 4946 buf_si[0] = nrows; 4947 buf_si_i[0] = 0; 4948 nrows = 0; 4949 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4950 anzi = ai[i + 1] - ai[i]; 4951 if (anzi) { 4952 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4953 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4954 nrows++; 4955 } 4956 } 4957 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4958 k++; 4959 buf_si += len_si[proc]; 4960 } 4961 4962 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4963 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4964 4965 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4966 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4967 4968 PetscCall(PetscFree(len_si)); 4969 PetscCall(PetscFree(len_ri)); 4970 PetscCall(PetscFree(rj_waits)); 4971 PetscCall(PetscFree2(si_waits, sj_waits)); 4972 PetscCall(PetscFree(ri_waits)); 4973 PetscCall(PetscFree(buf_s)); 4974 PetscCall(PetscFree(status)); 4975 4976 /* compute a local seq matrix in each processor */ 4977 /* allocate bi array and free space for accumulating nonzero column info */ 4978 PetscCall(PetscMalloc1(m + 1, &bi)); 4979 bi[0] = 0; 4980 4981 /* create and initialize a linked list */ 4982 nlnk = N + 1; 4983 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4984 4985 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4986 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4987 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4988 4989 current_space = free_space; 4990 4991 /* determine symbolic info for each local row */ 4992 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4993 4994 for (k = 0; k < merge->nrecv; k++) { 4995 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4996 nrows = *buf_ri_k[k]; 4997 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4998 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4999 } 5000 5001 MatPreallocateBegin(comm, m, n, dnz, onz); 5002 len = 0; 5003 for (i = 0; i < m; i++) { 5004 bnzi = 0; 5005 /* add local non-zero cols of this proc's seqmat into lnk */ 5006 arow = owners[rank] + i; 5007 anzi = ai[arow + 1] - ai[arow]; 5008 aj = a->j + ai[arow]; 5009 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5010 bnzi += nlnk; 5011 /* add received col data into lnk */ 5012 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5013 if (i == *nextrow[k]) { /* i-th row */ 5014 anzi = *(nextai[k] + 1) - *nextai[k]; 5015 aj = buf_rj[k] + *nextai[k]; 5016 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5017 bnzi += nlnk; 5018 nextrow[k]++; 5019 nextai[k]++; 5020 } 5021 } 5022 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5023 5024 /* if free space is not available, make more free space */ 5025 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5026 /* copy data into free space, then initialize lnk */ 5027 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5028 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5029 5030 current_space->array += bnzi; 5031 current_space->local_used += bnzi; 5032 current_space->local_remaining -= bnzi; 5033 5034 bi[i + 1] = bi[i] + bnzi; 5035 } 5036 5037 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5038 5039 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5040 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5041 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5042 5043 /* create symbolic parallel matrix B_mpi */ 5044 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5045 PetscCall(MatCreate(comm, &B_mpi)); 5046 if (n == PETSC_DECIDE) { 5047 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5048 } else { 5049 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5050 } 5051 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5052 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5053 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5054 MatPreallocateEnd(dnz, onz); 5055 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5056 5057 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5058 B_mpi->assembled = PETSC_FALSE; 5059 merge->bi = bi; 5060 merge->bj = bj; 5061 merge->buf_ri = buf_ri; 5062 merge->buf_rj = buf_rj; 5063 merge->coi = NULL; 5064 merge->coj = NULL; 5065 merge->owners_co = NULL; 5066 5067 PetscCall(PetscCommDestroy(&comm)); 5068 5069 /* attach the supporting struct to B_mpi for reuse */ 5070 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5071 PetscCall(PetscContainerSetPointer(container, merge)); 5072 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5073 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5074 PetscCall(PetscContainerDestroy(&container)); 5075 *mpimat = B_mpi; 5076 5077 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5078 PetscFunctionReturn(PETSC_SUCCESS); 5079 } 5080 5081 /*@ 5082 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5083 matrices from each processor 5084 5085 Collective 5086 5087 Input Parameters: 5088 + comm - the communicators the parallel matrix will live on 5089 . seqmat - the input sequential matrices 5090 . m - number of local rows (or `PETSC_DECIDE`) 5091 . n - number of local columns (or `PETSC_DECIDE`) 5092 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5093 5094 Output Parameter: 5095 . mpimat - the parallel matrix generated 5096 5097 Level: advanced 5098 5099 Note: 5100 The dimensions of the sequential matrix in each processor MUST be the same. 5101 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5102 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5103 5104 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5105 @*/ 5106 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5107 { 5108 PetscMPIInt size; 5109 5110 PetscFunctionBegin; 5111 PetscCallMPI(MPI_Comm_size(comm, &size)); 5112 if (size == 1) { 5113 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 if (scall == MAT_INITIAL_MATRIX) { 5115 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5116 } else { 5117 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5118 } 5119 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5120 PetscFunctionReturn(PETSC_SUCCESS); 5121 } 5122 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5123 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5124 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5125 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5126 PetscFunctionReturn(PETSC_SUCCESS); 5127 } 5128 5129 /*@ 5130 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5131 5132 Not Collective 5133 5134 Input Parameter: 5135 . A - the matrix 5136 5137 Output Parameter: 5138 . A_loc - the local sequential matrix generated 5139 5140 Level: developer 5141 5142 Notes: 5143 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5144 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5145 `n` is the global column count obtained with `MatGetSize()` 5146 5147 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5148 5149 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5150 5151 Destroy the matrix with `MatDestroy()` 5152 5153 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5154 @*/ 5155 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5156 { 5157 PetscBool mpi; 5158 5159 PetscFunctionBegin; 5160 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5161 if (mpi) { 5162 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5163 } else { 5164 *A_loc = A; 5165 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5166 } 5167 PetscFunctionReturn(PETSC_SUCCESS); 5168 } 5169 5170 /*@ 5171 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5172 5173 Not Collective 5174 5175 Input Parameters: 5176 + A - the matrix 5177 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5178 5179 Output Parameter: 5180 . A_loc - the local sequential matrix generated 5181 5182 Level: developer 5183 5184 Notes: 5185 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5186 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5187 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5188 5189 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5190 5191 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5192 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5193 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5194 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5195 5196 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5197 @*/ 5198 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5199 { 5200 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5201 Mat_SeqAIJ *mat, *a, *b; 5202 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5203 const PetscScalar *aa, *ba, *aav, *bav; 5204 PetscScalar *ca, *cam; 5205 PetscMPIInt size; 5206 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5207 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5208 PetscBool match; 5209 5210 PetscFunctionBegin; 5211 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5212 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5213 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5214 if (size == 1) { 5215 if (scall == MAT_INITIAL_MATRIX) { 5216 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5217 *A_loc = mpimat->A; 5218 } else if (scall == MAT_REUSE_MATRIX) { 5219 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5220 } 5221 PetscFunctionReturn(PETSC_SUCCESS); 5222 } 5223 5224 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5225 a = (Mat_SeqAIJ *)mpimat->A->data; 5226 b = (Mat_SeqAIJ *)mpimat->B->data; 5227 ai = a->i; 5228 aj = a->j; 5229 bi = b->i; 5230 bj = b->j; 5231 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5232 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5233 aa = aav; 5234 ba = bav; 5235 if (scall == MAT_INITIAL_MATRIX) { 5236 PetscCall(PetscMalloc1(1 + am, &ci)); 5237 ci[0] = 0; 5238 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5239 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5240 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5241 k = 0; 5242 for (i = 0; i < am; i++) { 5243 ncols_o = bi[i + 1] - bi[i]; 5244 ncols_d = ai[i + 1] - ai[i]; 5245 /* off-diagonal portion of A */ 5246 for (jo = 0; jo < ncols_o; jo++) { 5247 col = cmap[*bj]; 5248 if (col >= cstart) break; 5249 cj[k] = col; 5250 bj++; 5251 ca[k++] = *ba++; 5252 } 5253 /* diagonal portion of A */ 5254 for (j = 0; j < ncols_d; j++) { 5255 cj[k] = cstart + *aj++; 5256 ca[k++] = *aa++; 5257 } 5258 /* off-diagonal portion of A */ 5259 for (j = jo; j < ncols_o; j++) { 5260 cj[k] = cmap[*bj++]; 5261 ca[k++] = *ba++; 5262 } 5263 } 5264 /* put together the new matrix */ 5265 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5266 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5267 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5268 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5269 mat->free_a = PETSC_TRUE; 5270 mat->free_ij = PETSC_TRUE; 5271 mat->nonew = 0; 5272 } else if (scall == MAT_REUSE_MATRIX) { 5273 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5274 ci = mat->i; 5275 cj = mat->j; 5276 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5277 for (i = 0; i < am; i++) { 5278 /* off-diagonal portion of A */ 5279 ncols_o = bi[i + 1] - bi[i]; 5280 for (jo = 0; jo < ncols_o; jo++) { 5281 col = cmap[*bj]; 5282 if (col >= cstart) break; 5283 *cam++ = *ba++; 5284 bj++; 5285 } 5286 /* diagonal portion of A */ 5287 ncols_d = ai[i + 1] - ai[i]; 5288 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5289 /* off-diagonal portion of A */ 5290 for (j = jo; j < ncols_o; j++) { 5291 *cam++ = *ba++; 5292 bj++; 5293 } 5294 } 5295 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5296 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5297 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5298 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5299 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5300 PetscFunctionReturn(PETSC_SUCCESS); 5301 } 5302 5303 /*@ 5304 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5305 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5306 5307 Not Collective 5308 5309 Input Parameters: 5310 + A - the matrix 5311 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5312 5313 Output Parameters: 5314 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5315 - A_loc - the local sequential matrix generated 5316 5317 Level: developer 5318 5319 Note: 5320 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5321 part, then those associated with the off-diagonal part (in its local ordering) 5322 5323 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5324 @*/ 5325 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5326 { 5327 Mat Ao, Ad; 5328 const PetscInt *cmap; 5329 PetscMPIInt size; 5330 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5331 5332 PetscFunctionBegin; 5333 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5334 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5335 if (size == 1) { 5336 if (scall == MAT_INITIAL_MATRIX) { 5337 PetscCall(PetscObjectReference((PetscObject)Ad)); 5338 *A_loc = Ad; 5339 } else if (scall == MAT_REUSE_MATRIX) { 5340 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5341 } 5342 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5343 PetscFunctionReturn(PETSC_SUCCESS); 5344 } 5345 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5346 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5347 if (f) { 5348 PetscCall((*f)(A, scall, glob, A_loc)); 5349 } else { 5350 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5351 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5352 Mat_SeqAIJ *c; 5353 PetscInt *ai = a->i, *aj = a->j; 5354 PetscInt *bi = b->i, *bj = b->j; 5355 PetscInt *ci, *cj; 5356 const PetscScalar *aa, *ba; 5357 PetscScalar *ca; 5358 PetscInt i, j, am, dn, on; 5359 5360 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5361 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5362 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5363 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5364 if (scall == MAT_INITIAL_MATRIX) { 5365 PetscInt k; 5366 PetscCall(PetscMalloc1(1 + am, &ci)); 5367 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5368 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5369 ci[0] = 0; 5370 for (i = 0, k = 0; i < am; i++) { 5371 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5372 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5373 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5374 /* diagonal portion of A */ 5375 for (j = 0; j < ncols_d; j++, k++) { 5376 cj[k] = *aj++; 5377 ca[k] = *aa++; 5378 } 5379 /* off-diagonal portion of A */ 5380 for (j = 0; j < ncols_o; j++, k++) { 5381 cj[k] = dn + *bj++; 5382 ca[k] = *ba++; 5383 } 5384 } 5385 /* put together the new matrix */ 5386 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5387 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5388 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5389 c = (Mat_SeqAIJ *)(*A_loc)->data; 5390 c->free_a = PETSC_TRUE; 5391 c->free_ij = PETSC_TRUE; 5392 c->nonew = 0; 5393 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5394 } else if (scall == MAT_REUSE_MATRIX) { 5395 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5396 for (i = 0; i < am; i++) { 5397 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5398 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5399 /* diagonal portion of A */ 5400 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5401 /* off-diagonal portion of A */ 5402 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5403 } 5404 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5405 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5406 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5407 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5408 if (glob) { 5409 PetscInt cst, *gidx; 5410 5411 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5412 PetscCall(PetscMalloc1(dn + on, &gidx)); 5413 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5414 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5415 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5416 } 5417 } 5418 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5419 PetscFunctionReturn(PETSC_SUCCESS); 5420 } 5421 5422 /*@C 5423 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5424 5425 Not Collective 5426 5427 Input Parameters: 5428 + A - the matrix 5429 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5430 . row - index set of rows to extract (or `NULL`) 5431 - col - index set of columns to extract (or `NULL`) 5432 5433 Output Parameter: 5434 . A_loc - the local sequential matrix generated 5435 5436 Level: developer 5437 5438 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5439 @*/ 5440 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5441 { 5442 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5443 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5444 IS isrowa, iscola; 5445 Mat *aloc; 5446 PetscBool match; 5447 5448 PetscFunctionBegin; 5449 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5450 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5451 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5452 if (!row) { 5453 start = A->rmap->rstart; 5454 end = A->rmap->rend; 5455 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5456 } else { 5457 isrowa = *row; 5458 } 5459 if (!col) { 5460 start = A->cmap->rstart; 5461 cmap = a->garray; 5462 nzA = a->A->cmap->n; 5463 nzB = a->B->cmap->n; 5464 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5465 ncols = 0; 5466 for (i = 0; i < nzB; i++) { 5467 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5468 else break; 5469 } 5470 imark = i; 5471 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5472 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5473 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5474 } else { 5475 iscola = *col; 5476 } 5477 if (scall != MAT_INITIAL_MATRIX) { 5478 PetscCall(PetscMalloc1(1, &aloc)); 5479 aloc[0] = *A_loc; 5480 } 5481 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5482 if (!col) { /* attach global id of condensed columns */ 5483 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5484 } 5485 *A_loc = aloc[0]; 5486 PetscCall(PetscFree(aloc)); 5487 if (!row) PetscCall(ISDestroy(&isrowa)); 5488 if (!col) PetscCall(ISDestroy(&iscola)); 5489 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5490 PetscFunctionReturn(PETSC_SUCCESS); 5491 } 5492 5493 /* 5494 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5495 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5496 * on a global size. 5497 * */ 5498 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5499 { 5500 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5501 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5502 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5503 PetscMPIInt owner; 5504 PetscSFNode *iremote, *oiremote; 5505 const PetscInt *lrowindices; 5506 PetscSF sf, osf; 5507 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5508 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5509 MPI_Comm comm; 5510 ISLocalToGlobalMapping mapping; 5511 const PetscScalar *pd_a, *po_a; 5512 5513 PetscFunctionBegin; 5514 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5515 /* plocalsize is the number of roots 5516 * nrows is the number of leaves 5517 * */ 5518 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5519 PetscCall(ISGetLocalSize(rows, &nrows)); 5520 PetscCall(PetscCalloc1(nrows, &iremote)); 5521 PetscCall(ISGetIndices(rows, &lrowindices)); 5522 for (i = 0; i < nrows; i++) { 5523 /* Find a remote index and an owner for a row 5524 * The row could be local or remote 5525 * */ 5526 owner = 0; 5527 lidx = 0; 5528 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5529 iremote[i].index = lidx; 5530 iremote[i].rank = owner; 5531 } 5532 /* Create SF to communicate how many nonzero columns for each row */ 5533 PetscCall(PetscSFCreate(comm, &sf)); 5534 /* SF will figure out the number of nonzero columns for each row, and their 5535 * offsets 5536 * */ 5537 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5538 PetscCall(PetscSFSetFromOptions(sf)); 5539 PetscCall(PetscSFSetUp(sf)); 5540 5541 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5542 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5543 PetscCall(PetscCalloc1(nrows, &pnnz)); 5544 roffsets[0] = 0; 5545 roffsets[1] = 0; 5546 for (i = 0; i < plocalsize; i++) { 5547 /* diagonal */ 5548 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5549 /* off-diagonal */ 5550 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5551 /* compute offsets so that we relative location for each row */ 5552 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5553 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5554 } 5555 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5556 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5557 /* 'r' means root, and 'l' means leaf */ 5558 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5559 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5560 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5561 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5562 PetscCall(PetscSFDestroy(&sf)); 5563 PetscCall(PetscFree(roffsets)); 5564 PetscCall(PetscFree(nrcols)); 5565 dntotalcols = 0; 5566 ontotalcols = 0; 5567 ncol = 0; 5568 for (i = 0; i < nrows; i++) { 5569 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5570 ncol = PetscMax(pnnz[i], ncol); 5571 /* diagonal */ 5572 dntotalcols += nlcols[i * 2 + 0]; 5573 /* off-diagonal */ 5574 ontotalcols += nlcols[i * 2 + 1]; 5575 } 5576 /* We do not need to figure the right number of columns 5577 * since all the calculations will be done by going through the raw data 5578 * */ 5579 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5580 PetscCall(MatSetUp(*P_oth)); 5581 PetscCall(PetscFree(pnnz)); 5582 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5583 /* diagonal */ 5584 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5585 /* off-diagonal */ 5586 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5587 /* diagonal */ 5588 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5589 /* off-diagonal */ 5590 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5591 dntotalcols = 0; 5592 ontotalcols = 0; 5593 ntotalcols = 0; 5594 for (i = 0; i < nrows; i++) { 5595 owner = 0; 5596 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5597 /* Set iremote for diag matrix */ 5598 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5599 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5600 iremote[dntotalcols].rank = owner; 5601 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5602 ilocal[dntotalcols++] = ntotalcols++; 5603 } 5604 /* off-diagonal */ 5605 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5606 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5607 oiremote[ontotalcols].rank = owner; 5608 oilocal[ontotalcols++] = ntotalcols++; 5609 } 5610 } 5611 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5612 PetscCall(PetscFree(loffsets)); 5613 PetscCall(PetscFree(nlcols)); 5614 PetscCall(PetscSFCreate(comm, &sf)); 5615 /* P serves as roots and P_oth is leaves 5616 * Diag matrix 5617 * */ 5618 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5619 PetscCall(PetscSFSetFromOptions(sf)); 5620 PetscCall(PetscSFSetUp(sf)); 5621 5622 PetscCall(PetscSFCreate(comm, &osf)); 5623 /* off-diagonal */ 5624 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5625 PetscCall(PetscSFSetFromOptions(osf)); 5626 PetscCall(PetscSFSetUp(osf)); 5627 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5628 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5629 /* operate on the matrix internal data to save memory */ 5630 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5631 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5632 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5633 /* Convert to global indices for diag matrix */ 5634 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5635 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5636 /* We want P_oth store global indices */ 5637 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5638 /* Use memory scalable approach */ 5639 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5640 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5641 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5642 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5643 /* Convert back to local indices */ 5644 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5645 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5646 nout = 0; 5647 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5648 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5649 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5650 /* Exchange values */ 5651 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5652 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5653 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5654 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5655 /* Stop PETSc from shrinking memory */ 5656 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5657 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5658 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5659 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5660 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5661 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5662 PetscCall(PetscSFDestroy(&sf)); 5663 PetscCall(PetscSFDestroy(&osf)); 5664 PetscFunctionReturn(PETSC_SUCCESS); 5665 } 5666 5667 /* 5668 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5669 * This supports MPIAIJ and MAIJ 5670 * */ 5671 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5672 { 5673 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5674 Mat_SeqAIJ *p_oth; 5675 IS rows, map; 5676 PetscHMapI hamp; 5677 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5678 MPI_Comm comm; 5679 PetscSF sf, osf; 5680 PetscBool has; 5681 5682 PetscFunctionBegin; 5683 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5684 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5685 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5686 * and then create a submatrix (that often is an overlapping matrix) 5687 * */ 5688 if (reuse == MAT_INITIAL_MATRIX) { 5689 /* Use a hash table to figure out unique keys */ 5690 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5691 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5692 count = 0; 5693 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5694 for (i = 0; i < a->B->cmap->n; i++) { 5695 key = a->garray[i] / dof; 5696 PetscCall(PetscHMapIHas(hamp, key, &has)); 5697 if (!has) { 5698 mapping[i] = count; 5699 PetscCall(PetscHMapISet(hamp, key, count++)); 5700 } else { 5701 /* Current 'i' has the same value the previous step */ 5702 mapping[i] = count - 1; 5703 } 5704 } 5705 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5706 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5707 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5708 PetscCall(PetscCalloc1(htsize, &rowindices)); 5709 off = 0; 5710 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5711 PetscCall(PetscHMapIDestroy(&hamp)); 5712 PetscCall(PetscSortInt(htsize, rowindices)); 5713 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5714 /* In case, the matrix was already created but users want to recreate the matrix */ 5715 PetscCall(MatDestroy(P_oth)); 5716 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5717 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5718 PetscCall(ISDestroy(&map)); 5719 PetscCall(ISDestroy(&rows)); 5720 } else if (reuse == MAT_REUSE_MATRIX) { 5721 /* If matrix was already created, we simply update values using SF objects 5722 * that as attached to the matrix earlier. 5723 */ 5724 const PetscScalar *pd_a, *po_a; 5725 5726 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5727 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5728 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5729 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5730 /* Update values in place */ 5731 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5732 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5733 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5734 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5735 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5736 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5737 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5738 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5739 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5740 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5741 PetscFunctionReturn(PETSC_SUCCESS); 5742 } 5743 5744 /*@C 5745 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5746 5747 Collective 5748 5749 Input Parameters: 5750 + A - the first matrix in `MATMPIAIJ` format 5751 . B - the second matrix in `MATMPIAIJ` format 5752 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5753 5754 Output Parameters: 5755 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5756 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5757 - B_seq - the sequential matrix generated 5758 5759 Level: developer 5760 5761 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5762 @*/ 5763 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5764 { 5765 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5766 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5767 IS isrowb, iscolb; 5768 Mat *bseq = NULL; 5769 5770 PetscFunctionBegin; 5771 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5772 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5773 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5774 5775 if (scall == MAT_INITIAL_MATRIX) { 5776 start = A->cmap->rstart; 5777 cmap = a->garray; 5778 nzA = a->A->cmap->n; 5779 nzB = a->B->cmap->n; 5780 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5781 ncols = 0; 5782 for (i = 0; i < nzB; i++) { /* row < local row index */ 5783 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5784 else break; 5785 } 5786 imark = i; 5787 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5788 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5789 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5790 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5791 } else { 5792 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5793 isrowb = *rowb; 5794 iscolb = *colb; 5795 PetscCall(PetscMalloc1(1, &bseq)); 5796 bseq[0] = *B_seq; 5797 } 5798 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5799 *B_seq = bseq[0]; 5800 PetscCall(PetscFree(bseq)); 5801 if (!rowb) { 5802 PetscCall(ISDestroy(&isrowb)); 5803 } else { 5804 *rowb = isrowb; 5805 } 5806 if (!colb) { 5807 PetscCall(ISDestroy(&iscolb)); 5808 } else { 5809 *colb = iscolb; 5810 } 5811 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5812 PetscFunctionReturn(PETSC_SUCCESS); 5813 } 5814 5815 /* 5816 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5817 of the OFF-DIAGONAL portion of local A 5818 5819 Collective 5820 5821 Input Parameters: 5822 + A,B - the matrices in `MATMPIAIJ` format 5823 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5824 5825 Output Parameter: 5826 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5827 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5828 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5829 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5830 5831 Developer Note: 5832 This directly accesses information inside the VecScatter associated with the matrix-vector product 5833 for this matrix. This is not desirable.. 5834 5835 Level: developer 5836 5837 */ 5838 5839 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5840 { 5841 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5842 VecScatter ctx; 5843 MPI_Comm comm; 5844 const PetscMPIInt *rprocs, *sprocs; 5845 PetscMPIInt nrecvs, nsends; 5846 const PetscInt *srow, *rstarts, *sstarts; 5847 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5848 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5849 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5850 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5851 PetscMPIInt size, tag, rank, nreqs; 5852 5853 PetscFunctionBegin; 5854 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5855 PetscCallMPI(MPI_Comm_size(comm, &size)); 5856 5857 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5858 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5859 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5860 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5861 5862 if (size == 1) { 5863 startsj_s = NULL; 5864 bufa_ptr = NULL; 5865 *B_oth = NULL; 5866 PetscFunctionReturn(PETSC_SUCCESS); 5867 } 5868 5869 ctx = a->Mvctx; 5870 tag = ((PetscObject)ctx)->tag; 5871 5872 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5873 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5874 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5875 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5876 PetscCall(PetscMalloc1(nreqs, &reqs)); 5877 rwaits = reqs; 5878 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5879 5880 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5881 if (scall == MAT_INITIAL_MATRIX) { 5882 /* i-array */ 5883 /* post receives */ 5884 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5885 for (i = 0; i < nrecvs; i++) { 5886 rowlen = rvalues + rstarts[i] * rbs; 5887 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5888 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5889 } 5890 5891 /* pack the outgoing message */ 5892 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5893 5894 sstartsj[0] = 0; 5895 rstartsj[0] = 0; 5896 len = 0; /* total length of j or a array to be sent */ 5897 if (nsends) { 5898 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5899 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5900 } 5901 for (i = 0; i < nsends; i++) { 5902 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5903 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5904 for (j = 0; j < nrows; j++) { 5905 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5906 for (l = 0; l < sbs; l++) { 5907 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5908 5909 rowlen[j * sbs + l] = ncols; 5910 5911 len += ncols; 5912 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5913 } 5914 k++; 5915 } 5916 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5917 5918 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5919 } 5920 /* recvs and sends of i-array are completed */ 5921 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5922 PetscCall(PetscFree(svalues)); 5923 5924 /* allocate buffers for sending j and a arrays */ 5925 PetscCall(PetscMalloc1(len + 1, &bufj)); 5926 PetscCall(PetscMalloc1(len + 1, &bufa)); 5927 5928 /* create i-array of B_oth */ 5929 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5930 5931 b_othi[0] = 0; 5932 len = 0; /* total length of j or a array to be received */ 5933 k = 0; 5934 for (i = 0; i < nrecvs; i++) { 5935 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5936 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5937 for (j = 0; j < nrows; j++) { 5938 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5939 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5940 k++; 5941 } 5942 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5943 } 5944 PetscCall(PetscFree(rvalues)); 5945 5946 /* allocate space for j and a arrays of B_oth */ 5947 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5948 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5949 5950 /* j-array */ 5951 /* post receives of j-array */ 5952 for (i = 0; i < nrecvs; i++) { 5953 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5954 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5955 } 5956 5957 /* pack the outgoing message j-array */ 5958 if (nsends) k = sstarts[0]; 5959 for (i = 0; i < nsends; i++) { 5960 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5961 bufJ = bufj + sstartsj[i]; 5962 for (j = 0; j < nrows; j++) { 5963 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5964 for (ll = 0; ll < sbs; ll++) { 5965 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5966 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5967 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5968 } 5969 } 5970 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5971 } 5972 5973 /* recvs and sends of j-array are completed */ 5974 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5975 } else if (scall == MAT_REUSE_MATRIX) { 5976 sstartsj = *startsj_s; 5977 rstartsj = *startsj_r; 5978 bufa = *bufa_ptr; 5979 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5980 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5981 5982 /* a-array */ 5983 /* post receives of a-array */ 5984 for (i = 0; i < nrecvs; i++) { 5985 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5986 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5987 } 5988 5989 /* pack the outgoing message a-array */ 5990 if (nsends) k = sstarts[0]; 5991 for (i = 0; i < nsends; i++) { 5992 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5993 bufA = bufa + sstartsj[i]; 5994 for (j = 0; j < nrows; j++) { 5995 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5996 for (ll = 0; ll < sbs; ll++) { 5997 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5998 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5999 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6000 } 6001 } 6002 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6003 } 6004 /* recvs and sends of a-array are completed */ 6005 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6006 PetscCall(PetscFree(reqs)); 6007 6008 if (scall == MAT_INITIAL_MATRIX) { 6009 Mat_SeqAIJ *b_oth; 6010 6011 /* put together the new matrix */ 6012 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6013 6014 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6015 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6016 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6017 b_oth->free_a = PETSC_TRUE; 6018 b_oth->free_ij = PETSC_TRUE; 6019 b_oth->nonew = 0; 6020 6021 PetscCall(PetscFree(bufj)); 6022 if (!startsj_s || !bufa_ptr) { 6023 PetscCall(PetscFree2(sstartsj, rstartsj)); 6024 PetscCall(PetscFree(bufa_ptr)); 6025 } else { 6026 *startsj_s = sstartsj; 6027 *startsj_r = rstartsj; 6028 *bufa_ptr = bufa; 6029 } 6030 } else if (scall == MAT_REUSE_MATRIX) { 6031 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6032 } 6033 6034 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6035 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6036 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6037 PetscFunctionReturn(PETSC_SUCCESS); 6038 } 6039 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6043 #if defined(PETSC_HAVE_MKL_SPARSE) 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6048 #if defined(PETSC_HAVE_ELEMENTAL) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_SCALAPACK) 6052 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_HYPRE) 6055 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_CUDA) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_HIP) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6067 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6068 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6069 6070 /* 6071 Computes (B'*A')' since computing B*A directly is untenable 6072 6073 n p p 6074 [ ] [ ] [ ] 6075 m [ A ] * n [ B ] = m [ C ] 6076 [ ] [ ] [ ] 6077 6078 */ 6079 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6080 { 6081 Mat At, Bt, Ct; 6082 6083 PetscFunctionBegin; 6084 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6085 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6086 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6087 PetscCall(MatDestroy(&At)); 6088 PetscCall(MatDestroy(&Bt)); 6089 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6090 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6091 PetscCall(MatDestroy(&Ct)); 6092 PetscFunctionReturn(PETSC_SUCCESS); 6093 } 6094 6095 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6096 { 6097 PetscBool cisdense; 6098 6099 PetscFunctionBegin; 6100 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6101 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6102 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6103 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6104 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6105 PetscCall(MatSetUp(C)); 6106 6107 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6108 PetscFunctionReturn(PETSC_SUCCESS); 6109 } 6110 6111 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6112 { 6113 Mat_Product *product = C->product; 6114 Mat A = product->A, B = product->B; 6115 6116 PetscFunctionBegin; 6117 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6118 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6119 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6120 C->ops->productsymbolic = MatProductSymbolic_AB; 6121 PetscFunctionReturn(PETSC_SUCCESS); 6122 } 6123 6124 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6125 { 6126 Mat_Product *product = C->product; 6127 6128 PetscFunctionBegin; 6129 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6130 PetscFunctionReturn(PETSC_SUCCESS); 6131 } 6132 6133 /* 6134 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6135 6136 Input Parameters: 6137 6138 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6139 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6140 6141 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6142 6143 For Set1, j1[] contains column indices of the nonzeros. 6144 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6145 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6146 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6147 6148 Similar for Set2. 6149 6150 This routine merges the two sets of nonzeros row by row and removes repeats. 6151 6152 Output Parameters: (memory is allocated by the caller) 6153 6154 i[],j[]: the CSR of the merged matrix, which has m rows. 6155 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6156 imap2[]: similar to imap1[], but for Set2. 6157 Note we order nonzeros row-by-row and from left to right. 6158 */ 6159 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6160 { 6161 PetscInt r, m; /* Row index of mat */ 6162 PetscCount t, t1, t2, b1, e1, b2, e2; 6163 6164 PetscFunctionBegin; 6165 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6166 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6167 i[0] = 0; 6168 for (r = 0; r < m; r++) { /* Do row by row merging */ 6169 b1 = rowBegin1[r]; 6170 e1 = rowEnd1[r]; 6171 b2 = rowBegin2[r]; 6172 e2 = rowEnd2[r]; 6173 while (b1 < e1 && b2 < e2) { 6174 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6175 j[t] = j1[b1]; 6176 imap1[t1] = t; 6177 imap2[t2] = t; 6178 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6179 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6180 t1++; 6181 t2++; 6182 t++; 6183 } else if (j1[b1] < j2[b2]) { 6184 j[t] = j1[b1]; 6185 imap1[t1] = t; 6186 b1 += jmap1[t1 + 1] - jmap1[t1]; 6187 t1++; 6188 t++; 6189 } else { 6190 j[t] = j2[b2]; 6191 imap2[t2] = t; 6192 b2 += jmap2[t2 + 1] - jmap2[t2]; 6193 t2++; 6194 t++; 6195 } 6196 } 6197 /* Merge the remaining in either j1[] or j2[] */ 6198 while (b1 < e1) { 6199 j[t] = j1[b1]; 6200 imap1[t1] = t; 6201 b1 += jmap1[t1 + 1] - jmap1[t1]; 6202 t1++; 6203 t++; 6204 } 6205 while (b2 < e2) { 6206 j[t] = j2[b2]; 6207 imap2[t2] = t; 6208 b2 += jmap2[t2 + 1] - jmap2[t2]; 6209 t2++; 6210 t++; 6211 } 6212 PetscCall(PetscIntCast(t, i + r + 1)); 6213 } 6214 PetscFunctionReturn(PETSC_SUCCESS); 6215 } 6216 6217 /* 6218 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6219 6220 Input Parameters: 6221 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6222 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6223 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6224 6225 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6226 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6227 6228 Output Parameters: 6229 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6230 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6231 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6232 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6233 6234 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6235 Atot: number of entries belonging to the diagonal block. 6236 Annz: number of unique nonzeros belonging to the diagonal block. 6237 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6238 repeats (i.e., same 'i,j' pair). 6239 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6240 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6241 6242 Atot: number of entries belonging to the diagonal block 6243 Annz: number of unique nonzeros belonging to the diagonal block. 6244 6245 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6246 6247 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6248 */ 6249 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6250 { 6251 PetscInt cstart, cend, rstart, rend, row, col; 6252 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6253 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6254 PetscCount k, m, p, q, r, s, mid; 6255 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6256 6257 PetscFunctionBegin; 6258 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6259 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6260 m = rend - rstart; 6261 6262 /* Skip negative rows */ 6263 for (k = 0; k < n; k++) 6264 if (i[k] >= 0) break; 6265 6266 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6267 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6268 */ 6269 while (k < n) { 6270 row = i[k]; 6271 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6272 for (s = k; s < n; s++) 6273 if (i[s] != row) break; 6274 6275 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6276 for (p = k; p < s; p++) { 6277 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6278 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6279 } 6280 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6281 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6282 rowBegin[row - rstart] = k; 6283 rowMid[row - rstart] = mid; 6284 rowEnd[row - rstart] = s; 6285 6286 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6287 Atot += mid - k; 6288 Btot += s - mid; 6289 6290 /* Count unique nonzeros of this diag row */ 6291 for (p = k; p < mid;) { 6292 col = j[p]; 6293 do { 6294 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6295 p++; 6296 } while (p < mid && j[p] == col); 6297 Annz++; 6298 } 6299 6300 /* Count unique nonzeros of this offdiag row */ 6301 for (p = mid; p < s;) { 6302 col = j[p]; 6303 do { 6304 p++; 6305 } while (p < s && j[p] == col); 6306 Bnnz++; 6307 } 6308 k = s; 6309 } 6310 6311 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6312 PetscCall(PetscMalloc1(Atot, &Aperm)); 6313 PetscCall(PetscMalloc1(Btot, &Bperm)); 6314 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6315 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6316 6317 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6318 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6319 for (r = 0; r < m; r++) { 6320 k = rowBegin[r]; 6321 mid = rowMid[r]; 6322 s = rowEnd[r]; 6323 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6324 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6325 Atot += mid - k; 6326 Btot += s - mid; 6327 6328 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6329 for (p = k; p < mid;) { 6330 col = j[p]; 6331 q = p; 6332 do { 6333 p++; 6334 } while (p < mid && j[p] == col); 6335 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6336 Annz++; 6337 } 6338 6339 for (p = mid; p < s;) { 6340 col = j[p]; 6341 q = p; 6342 do { 6343 p++; 6344 } while (p < s && j[p] == col); 6345 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6346 Bnnz++; 6347 } 6348 } 6349 /* Output */ 6350 *Aperm_ = Aperm; 6351 *Annz_ = Annz; 6352 *Atot_ = Atot; 6353 *Ajmap_ = Ajmap; 6354 *Bperm_ = Bperm; 6355 *Bnnz_ = Bnnz; 6356 *Btot_ = Btot; 6357 *Bjmap_ = Bjmap; 6358 PetscFunctionReturn(PETSC_SUCCESS); 6359 } 6360 6361 /* 6362 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6363 6364 Input Parameters: 6365 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6366 nnz: number of unique nonzeros in the merged matrix 6367 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6368 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6369 6370 Output Parameter: (memory is allocated by the caller) 6371 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6372 6373 Example: 6374 nnz1 = 4 6375 nnz = 6 6376 imap = [1,3,4,5] 6377 jmap = [0,3,5,6,7] 6378 then, 6379 jmap_new = [0,0,3,3,5,6,7] 6380 */ 6381 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6382 { 6383 PetscCount k, p; 6384 6385 PetscFunctionBegin; 6386 jmap_new[0] = 0; 6387 p = nnz; /* p loops over jmap_new[] backwards */ 6388 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6389 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6390 } 6391 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6392 PetscFunctionReturn(PETSC_SUCCESS); 6393 } 6394 6395 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6396 { 6397 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6398 6399 PetscFunctionBegin; 6400 PetscCall(PetscSFDestroy(&coo->sf)); 6401 PetscCall(PetscFree(coo->Aperm1)); 6402 PetscCall(PetscFree(coo->Bperm1)); 6403 PetscCall(PetscFree(coo->Ajmap1)); 6404 PetscCall(PetscFree(coo->Bjmap1)); 6405 PetscCall(PetscFree(coo->Aimap2)); 6406 PetscCall(PetscFree(coo->Bimap2)); 6407 PetscCall(PetscFree(coo->Aperm2)); 6408 PetscCall(PetscFree(coo->Bperm2)); 6409 PetscCall(PetscFree(coo->Ajmap2)); 6410 PetscCall(PetscFree(coo->Bjmap2)); 6411 PetscCall(PetscFree(coo->Cperm1)); 6412 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6413 PetscCall(PetscFree(coo)); 6414 PetscFunctionReturn(PETSC_SUCCESS); 6415 } 6416 6417 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6418 { 6419 MPI_Comm comm; 6420 PetscMPIInt rank, size; 6421 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6422 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6423 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6424 PetscContainer container; 6425 MatCOOStruct_MPIAIJ *coo; 6426 6427 PetscFunctionBegin; 6428 PetscCall(PetscFree(mpiaij->garray)); 6429 PetscCall(VecDestroy(&mpiaij->lvec)); 6430 #if defined(PETSC_USE_CTABLE) 6431 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6432 #else 6433 PetscCall(PetscFree(mpiaij->colmap)); 6434 #endif 6435 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6436 mat->assembled = PETSC_FALSE; 6437 mat->was_assembled = PETSC_FALSE; 6438 6439 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6440 PetscCallMPI(MPI_Comm_size(comm, &size)); 6441 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6442 PetscCall(PetscLayoutSetUp(mat->rmap)); 6443 PetscCall(PetscLayoutSetUp(mat->cmap)); 6444 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6445 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6446 PetscCall(MatGetLocalSize(mat, &m, &n)); 6447 PetscCall(MatGetSize(mat, &M, &N)); 6448 6449 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6450 /* entries come first, then local rows, then remote rows. */ 6451 PetscCount n1 = coo_n, *perm1; 6452 PetscInt *i1 = coo_i, *j1 = coo_j; 6453 6454 PetscCall(PetscMalloc1(n1, &perm1)); 6455 for (k = 0; k < n1; k++) perm1[k] = k; 6456 6457 /* Manipulate indices so that entries with negative row or col indices will have smallest 6458 row indices, local entries will have greater but negative row indices, and remote entries 6459 will have positive row indices. 6460 */ 6461 for (k = 0; k < n1; k++) { 6462 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6463 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6464 else { 6465 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6466 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6467 } 6468 } 6469 6470 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6471 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6472 6473 /* Advance k to the first entry we need to take care of */ 6474 for (k = 0; k < n1; k++) 6475 if (i1[k] > PETSC_INT_MIN) break; 6476 PetscCount i1start = k; 6477 6478 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6479 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6480 6481 /* Send remote rows to their owner */ 6482 /* Find which rows should be sent to which remote ranks*/ 6483 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6484 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6485 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6486 const PetscInt *ranges; 6487 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6488 6489 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6490 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6491 for (k = rem; k < n1;) { 6492 PetscMPIInt owner; 6493 PetscInt firstRow, lastRow; 6494 6495 /* Locate a row range */ 6496 firstRow = i1[k]; /* first row of this owner */ 6497 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6498 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6499 6500 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6501 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6502 6503 /* All entries in [k,p) belong to this remote owner */ 6504 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6505 PetscMPIInt *sendto2; 6506 PetscInt *nentries2; 6507 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6508 6509 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6510 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6511 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6512 PetscCall(PetscFree2(sendto, nentries2)); 6513 sendto = sendto2; 6514 nentries = nentries2; 6515 maxNsend = maxNsend2; 6516 } 6517 sendto[nsend] = owner; 6518 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6519 nsend++; 6520 k = p; 6521 } 6522 6523 /* Build 1st SF to know offsets on remote to send data */ 6524 PetscSF sf1; 6525 PetscInt nroots = 1, nroots2 = 0; 6526 PetscInt nleaves = nsend, nleaves2 = 0; 6527 PetscInt *offsets; 6528 PetscSFNode *iremote; 6529 6530 PetscCall(PetscSFCreate(comm, &sf1)); 6531 PetscCall(PetscMalloc1(nsend, &iremote)); 6532 PetscCall(PetscMalloc1(nsend, &offsets)); 6533 for (k = 0; k < nsend; k++) { 6534 iremote[k].rank = sendto[k]; 6535 iremote[k].index = 0; 6536 nleaves2 += nentries[k]; 6537 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6538 } 6539 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6540 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6541 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6542 PetscCall(PetscSFDestroy(&sf1)); 6543 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6544 6545 /* Build 2nd SF to send remote COOs to their owner */ 6546 PetscSF sf2; 6547 nroots = nroots2; 6548 nleaves = nleaves2; 6549 PetscCall(PetscSFCreate(comm, &sf2)); 6550 PetscCall(PetscSFSetFromOptions(sf2)); 6551 PetscCall(PetscMalloc1(nleaves, &iremote)); 6552 p = 0; 6553 for (k = 0; k < nsend; k++) { 6554 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6555 for (q = 0; q < nentries[k]; q++, p++) { 6556 iremote[p].rank = sendto[k]; 6557 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6558 } 6559 } 6560 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6561 6562 /* Send the remote COOs to their owner */ 6563 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6564 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6565 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6566 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6567 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6568 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6569 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6570 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6571 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6572 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6573 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6574 6575 PetscCall(PetscFree(offsets)); 6576 PetscCall(PetscFree2(sendto, nentries)); 6577 6578 /* Sort received COOs by row along with the permutation array */ 6579 for (k = 0; k < n2; k++) perm2[k] = k; 6580 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6581 6582 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6583 PetscCount *Cperm1; 6584 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6585 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6586 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6587 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6588 6589 /* Support for HYPRE matrices, kind of a hack. 6590 Swap min column with diagonal so that diagonal values will go first */ 6591 PetscBool hypre; 6592 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6593 if (hypre) { 6594 PetscInt *minj; 6595 PetscBT hasdiag; 6596 6597 PetscCall(PetscBTCreate(m, &hasdiag)); 6598 PetscCall(PetscMalloc1(m, &minj)); 6599 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6600 for (k = i1start; k < rem; k++) { 6601 if (j1[k] < cstart || j1[k] >= cend) continue; 6602 const PetscInt rindex = i1[k] - rstart; 6603 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6604 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6605 } 6606 for (k = 0; k < n2; k++) { 6607 if (j2[k] < cstart || j2[k] >= cend) continue; 6608 const PetscInt rindex = i2[k] - rstart; 6609 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6610 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6611 } 6612 for (k = i1start; k < rem; k++) { 6613 const PetscInt rindex = i1[k] - rstart; 6614 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6615 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6616 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6617 } 6618 for (k = 0; k < n2; k++) { 6619 const PetscInt rindex = i2[k] - rstart; 6620 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6621 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6622 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6623 } 6624 PetscCall(PetscBTDestroy(&hasdiag)); 6625 PetscCall(PetscFree(minj)); 6626 } 6627 6628 /* Split local COOs and received COOs into diag/offdiag portions */ 6629 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6630 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6631 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6632 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6633 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6634 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6635 6636 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6637 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6638 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6639 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6640 6641 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6642 PetscInt *Ai, *Bi; 6643 PetscInt *Aj, *Bj; 6644 6645 PetscCall(PetscMalloc1(m + 1, &Ai)); 6646 PetscCall(PetscMalloc1(m + 1, &Bi)); 6647 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6648 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6649 6650 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6651 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6652 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6653 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6654 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6655 6656 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6657 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6658 6659 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6660 /* expect nonzeros in A/B most likely have local contributing entries */ 6661 PetscInt Annz = Ai[m]; 6662 PetscInt Bnnz = Bi[m]; 6663 PetscCount *Ajmap1_new, *Bjmap1_new; 6664 6665 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6666 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6667 6668 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6669 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6670 6671 PetscCall(PetscFree(Aimap1)); 6672 PetscCall(PetscFree(Ajmap1)); 6673 PetscCall(PetscFree(Bimap1)); 6674 PetscCall(PetscFree(Bjmap1)); 6675 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6676 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6677 PetscCall(PetscFree(perm1)); 6678 PetscCall(PetscFree3(i2, j2, perm2)); 6679 6680 Ajmap1 = Ajmap1_new; 6681 Bjmap1 = Bjmap1_new; 6682 6683 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6684 if (Annz < Annz1 + Annz2) { 6685 PetscInt *Aj_new; 6686 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6687 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6688 PetscCall(PetscFree(Aj)); 6689 Aj = Aj_new; 6690 } 6691 6692 if (Bnnz < Bnnz1 + Bnnz2) { 6693 PetscInt *Bj_new; 6694 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6695 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6696 PetscCall(PetscFree(Bj)); 6697 Bj = Bj_new; 6698 } 6699 6700 /* Create new submatrices for on-process and off-process coupling */ 6701 PetscScalar *Aa, *Ba; 6702 MatType rtype; 6703 Mat_SeqAIJ *a, *b; 6704 PetscObjectState state; 6705 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6706 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6707 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6708 if (cstart) { 6709 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6710 } 6711 6712 PetscCall(MatGetRootType_Private(mat, &rtype)); 6713 6714 MatSeqXAIJGetOptions_Private(mpiaij->A); 6715 PetscCall(MatDestroy(&mpiaij->A)); 6716 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6717 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6718 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6719 6720 MatSeqXAIJGetOptions_Private(mpiaij->B); 6721 PetscCall(MatDestroy(&mpiaij->B)); 6722 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6723 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6724 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6725 6726 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6727 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6728 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6729 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6730 6731 a = (Mat_SeqAIJ *)mpiaij->A->data; 6732 b = (Mat_SeqAIJ *)mpiaij->B->data; 6733 a->free_a = PETSC_TRUE; 6734 a->free_ij = PETSC_TRUE; 6735 b->free_a = PETSC_TRUE; 6736 b->free_ij = PETSC_TRUE; 6737 a->maxnz = a->nz; 6738 b->maxnz = b->nz; 6739 6740 /* conversion must happen AFTER multiply setup */ 6741 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6742 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6743 PetscCall(VecDestroy(&mpiaij->lvec)); 6744 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6745 6746 // Put the COO struct in a container and then attach that to the matrix 6747 PetscCall(PetscMalloc1(1, &coo)); 6748 coo->n = coo_n; 6749 coo->sf = sf2; 6750 coo->sendlen = nleaves; 6751 coo->recvlen = nroots; 6752 coo->Annz = Annz; 6753 coo->Bnnz = Bnnz; 6754 coo->Annz2 = Annz2; 6755 coo->Bnnz2 = Bnnz2; 6756 coo->Atot1 = Atot1; 6757 coo->Atot2 = Atot2; 6758 coo->Btot1 = Btot1; 6759 coo->Btot2 = Btot2; 6760 coo->Ajmap1 = Ajmap1; 6761 coo->Aperm1 = Aperm1; 6762 coo->Bjmap1 = Bjmap1; 6763 coo->Bperm1 = Bperm1; 6764 coo->Aimap2 = Aimap2; 6765 coo->Ajmap2 = Ajmap2; 6766 coo->Aperm2 = Aperm2; 6767 coo->Bimap2 = Bimap2; 6768 coo->Bjmap2 = Bjmap2; 6769 coo->Bperm2 = Bperm2; 6770 coo->Cperm1 = Cperm1; 6771 // Allocate in preallocation. If not used, it has zero cost on host 6772 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6773 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6774 PetscCall(PetscContainerSetPointer(container, coo)); 6775 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6776 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6777 PetscCall(PetscContainerDestroy(&container)); 6778 PetscFunctionReturn(PETSC_SUCCESS); 6779 } 6780 6781 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6782 { 6783 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6784 Mat A = mpiaij->A, B = mpiaij->B; 6785 PetscScalar *Aa, *Ba; 6786 PetscScalar *sendbuf, *recvbuf; 6787 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6788 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6789 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6790 const PetscCount *Cperm1; 6791 PetscContainer container; 6792 MatCOOStruct_MPIAIJ *coo; 6793 6794 PetscFunctionBegin; 6795 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6796 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6797 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6798 sendbuf = coo->sendbuf; 6799 recvbuf = coo->recvbuf; 6800 Ajmap1 = coo->Ajmap1; 6801 Ajmap2 = coo->Ajmap2; 6802 Aimap2 = coo->Aimap2; 6803 Bjmap1 = coo->Bjmap1; 6804 Bjmap2 = coo->Bjmap2; 6805 Bimap2 = coo->Bimap2; 6806 Aperm1 = coo->Aperm1; 6807 Aperm2 = coo->Aperm2; 6808 Bperm1 = coo->Bperm1; 6809 Bperm2 = coo->Bperm2; 6810 Cperm1 = coo->Cperm1; 6811 6812 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6813 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6814 6815 /* Pack entries to be sent to remote */ 6816 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6817 6818 /* Send remote entries to their owner and overlap the communication with local computation */ 6819 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6820 /* Add local entries to A and B */ 6821 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6822 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6823 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6824 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6825 } 6826 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6827 PetscScalar sum = 0.0; 6828 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6829 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6830 } 6831 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6832 6833 /* Add received remote entries to A and B */ 6834 for (PetscCount i = 0; i < coo->Annz2; i++) { 6835 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6836 } 6837 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6838 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6839 } 6840 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6841 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6842 PetscFunctionReturn(PETSC_SUCCESS); 6843 } 6844 6845 /*MC 6846 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6847 6848 Options Database Keys: 6849 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6850 6851 Level: beginner 6852 6853 Notes: 6854 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6855 in this case the values associated with the rows and columns one passes in are set to zero 6856 in the matrix 6857 6858 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6859 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6860 6861 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6862 M*/ 6863 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6864 { 6865 Mat_MPIAIJ *b; 6866 PetscMPIInt size; 6867 6868 PetscFunctionBegin; 6869 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6870 6871 PetscCall(PetscNew(&b)); 6872 B->data = (void *)b; 6873 B->ops[0] = MatOps_Values; 6874 B->assembled = PETSC_FALSE; 6875 B->insertmode = NOT_SET_VALUES; 6876 b->size = size; 6877 6878 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6879 6880 /* build cache for off array entries formed */ 6881 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6882 6883 b->donotstash = PETSC_FALSE; 6884 b->colmap = NULL; 6885 b->garray = NULL; 6886 b->roworiented = PETSC_TRUE; 6887 6888 /* stuff used for matrix vector multiply */ 6889 b->lvec = NULL; 6890 b->Mvctx = NULL; 6891 6892 /* stuff for MatGetRow() */ 6893 b->rowindices = NULL; 6894 b->rowvalues = NULL; 6895 b->getrowactive = PETSC_FALSE; 6896 6897 /* flexible pointer used in CUSPARSE classes */ 6898 b->spptr = NULL; 6899 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6910 #if defined(PETSC_HAVE_CUDA) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6912 #endif 6913 #if defined(PETSC_HAVE_HIP) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6915 #endif 6916 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6918 #endif 6919 #if defined(PETSC_HAVE_MKL_SPARSE) 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6921 #endif 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6926 #if defined(PETSC_HAVE_ELEMENTAL) 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6928 #endif 6929 #if defined(PETSC_HAVE_SCALAPACK) 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6931 #endif 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6934 #if defined(PETSC_HAVE_HYPRE) 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6937 #endif 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6942 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6943 PetscFunctionReturn(PETSC_SUCCESS); 6944 } 6945 6946 /*@ 6947 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6948 and "off-diagonal" part of the matrix in CSR format. 6949 6950 Collective 6951 6952 Input Parameters: 6953 + comm - MPI communicator 6954 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6955 . n - This value should be the same as the local size used in creating the 6956 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6957 calculated if `N` is given) For square matrices `n` is almost always `m`. 6958 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6959 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6960 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6961 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6962 . a - matrix values 6963 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6964 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6965 - oa - matrix values 6966 6967 Output Parameter: 6968 . mat - the matrix 6969 6970 Level: advanced 6971 6972 Notes: 6973 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6974 must free the arrays once the matrix has been destroyed and not before. 6975 6976 The `i` and `j` indices are 0 based 6977 6978 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6979 6980 This sets local rows and cannot be used to set off-processor values. 6981 6982 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6983 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6984 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6985 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6986 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6987 communication if it is known that only local entries will be set. 6988 6989 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6990 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6991 @*/ 6992 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6993 { 6994 Mat_MPIAIJ *maij; 6995 6996 PetscFunctionBegin; 6997 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6998 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6999 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7000 PetscCall(MatCreate(comm, mat)); 7001 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7002 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7003 maij = (Mat_MPIAIJ *)(*mat)->data; 7004 7005 (*mat)->preallocated = PETSC_TRUE; 7006 7007 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7008 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7009 7010 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7011 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7012 7013 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7014 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7015 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7016 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7017 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7018 PetscFunctionReturn(PETSC_SUCCESS); 7019 } 7020 7021 typedef struct { 7022 Mat *mp; /* intermediate products */ 7023 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7024 PetscInt cp; /* number of intermediate products */ 7025 7026 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7027 PetscInt *startsj_s, *startsj_r; 7028 PetscScalar *bufa; 7029 Mat P_oth; 7030 7031 /* may take advantage of merging product->B */ 7032 Mat Bloc; /* B-local by merging diag and off-diag */ 7033 7034 /* cusparse does not have support to split between symbolic and numeric phases. 7035 When api_user is true, we don't need to update the numerical values 7036 of the temporary storage */ 7037 PetscBool reusesym; 7038 7039 /* support for COO values insertion */ 7040 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7041 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7042 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7043 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7044 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7045 PetscMemType mtype; 7046 7047 /* customization */ 7048 PetscBool abmerge; 7049 PetscBool P_oth_bind; 7050 } MatMatMPIAIJBACKEND; 7051 7052 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7053 { 7054 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7055 PetscInt i; 7056 7057 PetscFunctionBegin; 7058 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7059 PetscCall(PetscFree(mmdata->bufa)); 7060 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7061 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7062 PetscCall(MatDestroy(&mmdata->P_oth)); 7063 PetscCall(MatDestroy(&mmdata->Bloc)); 7064 PetscCall(PetscSFDestroy(&mmdata->sf)); 7065 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7066 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7067 PetscCall(PetscFree(mmdata->own[0])); 7068 PetscCall(PetscFree(mmdata->own)); 7069 PetscCall(PetscFree(mmdata->off[0])); 7070 PetscCall(PetscFree(mmdata->off)); 7071 PetscCall(PetscFree(mmdata)); 7072 PetscFunctionReturn(PETSC_SUCCESS); 7073 } 7074 7075 /* Copy selected n entries with indices in idx[] of A to v[]. 7076 If idx is NULL, copy the whole data array of A to v[] 7077 */ 7078 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7079 { 7080 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7081 7082 PetscFunctionBegin; 7083 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7084 if (f) { 7085 PetscCall((*f)(A, n, idx, v)); 7086 } else { 7087 const PetscScalar *vv; 7088 7089 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7090 if (n && idx) { 7091 PetscScalar *w = v; 7092 const PetscInt *oi = idx; 7093 PetscInt j; 7094 7095 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7096 } else { 7097 PetscCall(PetscArraycpy(v, vv, n)); 7098 } 7099 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7100 } 7101 PetscFunctionReturn(PETSC_SUCCESS); 7102 } 7103 7104 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7105 { 7106 MatMatMPIAIJBACKEND *mmdata; 7107 PetscInt i, n_d, n_o; 7108 7109 PetscFunctionBegin; 7110 MatCheckProduct(C, 1); 7111 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7112 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7113 if (!mmdata->reusesym) { /* update temporary matrices */ 7114 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7115 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7116 } 7117 mmdata->reusesym = PETSC_FALSE; 7118 7119 for (i = 0; i < mmdata->cp; i++) { 7120 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7121 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7122 } 7123 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7124 PetscInt noff; 7125 7126 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7127 if (mmdata->mptmp[i]) continue; 7128 if (noff) { 7129 PetscInt nown; 7130 7131 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7132 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7133 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7134 n_o += noff; 7135 n_d += nown; 7136 } else { 7137 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7138 7139 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7140 n_d += mm->nz; 7141 } 7142 } 7143 if (mmdata->hasoffproc) { /* offprocess insertion */ 7144 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7145 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7146 } 7147 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7148 PetscFunctionReturn(PETSC_SUCCESS); 7149 } 7150 7151 /* Support for Pt * A, A * P, or Pt * A * P */ 7152 #define MAX_NUMBER_INTERMEDIATE 4 7153 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7154 { 7155 Mat_Product *product = C->product; 7156 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7157 Mat_MPIAIJ *a, *p; 7158 MatMatMPIAIJBACKEND *mmdata; 7159 ISLocalToGlobalMapping P_oth_l2g = NULL; 7160 IS glob = NULL; 7161 const char *prefix; 7162 char pprefix[256]; 7163 const PetscInt *globidx, *P_oth_idx; 7164 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7165 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7166 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7167 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7168 /* a base offset; type-2: sparse with a local to global map table */ 7169 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7170 7171 MatProductType ptype; 7172 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7173 PetscMPIInt size; 7174 7175 PetscFunctionBegin; 7176 MatCheckProduct(C, 1); 7177 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7178 ptype = product->type; 7179 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7180 ptype = MATPRODUCT_AB; 7181 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7182 } 7183 switch (ptype) { 7184 case MATPRODUCT_AB: 7185 A = product->A; 7186 P = product->B; 7187 m = A->rmap->n; 7188 n = P->cmap->n; 7189 M = A->rmap->N; 7190 N = P->cmap->N; 7191 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7192 break; 7193 case MATPRODUCT_AtB: 7194 P = product->A; 7195 A = product->B; 7196 m = P->cmap->n; 7197 n = A->cmap->n; 7198 M = P->cmap->N; 7199 N = A->cmap->N; 7200 hasoffproc = PETSC_TRUE; 7201 break; 7202 case MATPRODUCT_PtAP: 7203 A = product->A; 7204 P = product->B; 7205 m = P->cmap->n; 7206 n = P->cmap->n; 7207 M = P->cmap->N; 7208 N = P->cmap->N; 7209 hasoffproc = PETSC_TRUE; 7210 break; 7211 default: 7212 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7213 } 7214 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7215 if (size == 1) hasoffproc = PETSC_FALSE; 7216 7217 /* defaults */ 7218 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7219 mp[i] = NULL; 7220 mptmp[i] = PETSC_FALSE; 7221 rmapt[i] = -1; 7222 cmapt[i] = -1; 7223 rmapa[i] = NULL; 7224 cmapa[i] = NULL; 7225 } 7226 7227 /* customization */ 7228 PetscCall(PetscNew(&mmdata)); 7229 mmdata->reusesym = product->api_user; 7230 if (ptype == MATPRODUCT_AB) { 7231 if (product->api_user) { 7232 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7233 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7234 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7235 PetscOptionsEnd(); 7236 } else { 7237 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7238 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7239 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7240 PetscOptionsEnd(); 7241 } 7242 } else if (ptype == MATPRODUCT_PtAP) { 7243 if (product->api_user) { 7244 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7245 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7246 PetscOptionsEnd(); 7247 } else { 7248 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7249 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7250 PetscOptionsEnd(); 7251 } 7252 } 7253 a = (Mat_MPIAIJ *)A->data; 7254 p = (Mat_MPIAIJ *)P->data; 7255 PetscCall(MatSetSizes(C, m, n, M, N)); 7256 PetscCall(PetscLayoutSetUp(C->rmap)); 7257 PetscCall(PetscLayoutSetUp(C->cmap)); 7258 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7259 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7260 7261 cp = 0; 7262 switch (ptype) { 7263 case MATPRODUCT_AB: /* A * P */ 7264 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7265 7266 /* A_diag * P_local (merged or not) */ 7267 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7268 /* P is product->B */ 7269 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7270 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7271 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7272 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7273 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7274 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7275 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7276 mp[cp]->product->api_user = product->api_user; 7277 PetscCall(MatProductSetFromOptions(mp[cp])); 7278 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7279 PetscCall(ISGetIndices(glob, &globidx)); 7280 rmapt[cp] = 1; 7281 cmapt[cp] = 2; 7282 cmapa[cp] = globidx; 7283 mptmp[cp] = PETSC_FALSE; 7284 cp++; 7285 } else { /* A_diag * P_diag and A_diag * P_off */ 7286 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7287 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7288 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7289 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7290 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7291 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7292 mp[cp]->product->api_user = product->api_user; 7293 PetscCall(MatProductSetFromOptions(mp[cp])); 7294 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7295 rmapt[cp] = 1; 7296 cmapt[cp] = 1; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 rmapt[cp] = 1; 7309 cmapt[cp] = 2; 7310 cmapa[cp] = p->garray; 7311 mptmp[cp] = PETSC_FALSE; 7312 cp++; 7313 } 7314 7315 /* A_off * P_other */ 7316 if (mmdata->P_oth) { 7317 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7318 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7319 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7320 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7321 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7322 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7323 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7324 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7325 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7326 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7327 mp[cp]->product->api_user = product->api_user; 7328 PetscCall(MatProductSetFromOptions(mp[cp])); 7329 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7330 rmapt[cp] = 1; 7331 cmapt[cp] = 2; 7332 cmapa[cp] = P_oth_idx; 7333 mptmp[cp] = PETSC_FALSE; 7334 cp++; 7335 } 7336 break; 7337 7338 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7339 /* A is product->B */ 7340 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7341 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7342 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7343 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7344 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7345 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7346 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7347 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7348 mp[cp]->product->api_user = product->api_user; 7349 PetscCall(MatProductSetFromOptions(mp[cp])); 7350 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7351 PetscCall(ISGetIndices(glob, &globidx)); 7352 rmapt[cp] = 2; 7353 rmapa[cp] = globidx; 7354 cmapt[cp] = 2; 7355 cmapa[cp] = globidx; 7356 mptmp[cp] = PETSC_FALSE; 7357 cp++; 7358 } else { 7359 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7360 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7361 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7362 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7363 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7364 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7365 mp[cp]->product->api_user = product->api_user; 7366 PetscCall(MatProductSetFromOptions(mp[cp])); 7367 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7368 PetscCall(ISGetIndices(glob, &globidx)); 7369 rmapt[cp] = 1; 7370 cmapt[cp] = 2; 7371 cmapa[cp] = globidx; 7372 mptmp[cp] = PETSC_FALSE; 7373 cp++; 7374 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7375 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7376 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7377 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7378 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7379 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7380 mp[cp]->product->api_user = product->api_user; 7381 PetscCall(MatProductSetFromOptions(mp[cp])); 7382 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7383 rmapt[cp] = 2; 7384 rmapa[cp] = p->garray; 7385 cmapt[cp] = 2; 7386 cmapa[cp] = globidx; 7387 mptmp[cp] = PETSC_FALSE; 7388 cp++; 7389 } 7390 break; 7391 case MATPRODUCT_PtAP: 7392 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7393 /* P is product->B */ 7394 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7395 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7396 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7397 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7398 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7399 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7400 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7401 mp[cp]->product->api_user = product->api_user; 7402 PetscCall(MatProductSetFromOptions(mp[cp])); 7403 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7404 PetscCall(ISGetIndices(glob, &globidx)); 7405 rmapt[cp] = 2; 7406 rmapa[cp] = globidx; 7407 cmapt[cp] = 2; 7408 cmapa[cp] = globidx; 7409 mptmp[cp] = PETSC_FALSE; 7410 cp++; 7411 if (mmdata->P_oth) { 7412 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7413 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7414 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7415 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7416 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7417 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7418 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7419 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7420 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7421 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7422 mp[cp]->product->api_user = product->api_user; 7423 PetscCall(MatProductSetFromOptions(mp[cp])); 7424 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7425 mptmp[cp] = PETSC_TRUE; 7426 cp++; 7427 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7428 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7429 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7430 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7431 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7432 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7433 mp[cp]->product->api_user = product->api_user; 7434 PetscCall(MatProductSetFromOptions(mp[cp])); 7435 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7436 rmapt[cp] = 2; 7437 rmapa[cp] = globidx; 7438 cmapt[cp] = 2; 7439 cmapa[cp] = P_oth_idx; 7440 mptmp[cp] = PETSC_FALSE; 7441 cp++; 7442 } 7443 break; 7444 default: 7445 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7446 } 7447 /* sanity check */ 7448 if (size > 1) 7449 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7450 7451 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7452 for (i = 0; i < cp; i++) { 7453 mmdata->mp[i] = mp[i]; 7454 mmdata->mptmp[i] = mptmp[i]; 7455 } 7456 mmdata->cp = cp; 7457 C->product->data = mmdata; 7458 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7459 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7460 7461 /* memory type */ 7462 mmdata->mtype = PETSC_MEMTYPE_HOST; 7463 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7464 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7465 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7466 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7467 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7468 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7469 7470 /* prepare coo coordinates for values insertion */ 7471 7472 /* count total nonzeros of those intermediate seqaij Mats 7473 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7474 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7475 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7476 */ 7477 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7478 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7479 if (mptmp[cp]) continue; 7480 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7481 const PetscInt *rmap = rmapa[cp]; 7482 const PetscInt mr = mp[cp]->rmap->n; 7483 const PetscInt rs = C->rmap->rstart; 7484 const PetscInt re = C->rmap->rend; 7485 const PetscInt *ii = mm->i; 7486 for (i = 0; i < mr; i++) { 7487 const PetscInt gr = rmap[i]; 7488 const PetscInt nz = ii[i + 1] - ii[i]; 7489 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7490 else ncoo_oown += nz; /* this row is local */ 7491 } 7492 } else ncoo_d += mm->nz; 7493 } 7494 7495 /* 7496 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7497 7498 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7499 7500 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7501 7502 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7503 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7504 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7505 7506 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7507 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7508 */ 7509 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7510 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7511 7512 /* gather (i,j) of nonzeros inserted by remote procs */ 7513 if (hasoffproc) { 7514 PetscSF msf; 7515 PetscInt ncoo2, *coo_i2, *coo_j2; 7516 7517 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7518 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7519 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7520 7521 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7522 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7523 PetscInt *idxoff = mmdata->off[cp]; 7524 PetscInt *idxown = mmdata->own[cp]; 7525 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7526 const PetscInt *rmap = rmapa[cp]; 7527 const PetscInt *cmap = cmapa[cp]; 7528 const PetscInt *ii = mm->i; 7529 PetscInt *coi = coo_i + ncoo_o; 7530 PetscInt *coj = coo_j + ncoo_o; 7531 const PetscInt mr = mp[cp]->rmap->n; 7532 const PetscInt rs = C->rmap->rstart; 7533 const PetscInt re = C->rmap->rend; 7534 const PetscInt cs = C->cmap->rstart; 7535 for (i = 0; i < mr; i++) { 7536 const PetscInt *jj = mm->j + ii[i]; 7537 const PetscInt gr = rmap[i]; 7538 const PetscInt nz = ii[i + 1] - ii[i]; 7539 if (gr < rs || gr >= re) { /* this is an offproc row */ 7540 for (j = ii[i]; j < ii[i + 1]; j++) { 7541 *coi++ = gr; 7542 *idxoff++ = j; 7543 } 7544 if (!cmapt[cp]) { /* already global */ 7545 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7546 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7547 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7548 } else { /* offdiag */ 7549 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7550 } 7551 ncoo_o += nz; 7552 } else { /* this is a local row */ 7553 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7554 } 7555 } 7556 } 7557 mmdata->off[cp + 1] = idxoff; 7558 mmdata->own[cp + 1] = idxown; 7559 } 7560 7561 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7562 PetscInt incoo_o; 7563 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7564 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7565 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7566 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7567 ncoo = ncoo_d + ncoo_oown + ncoo2; 7568 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7569 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7570 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7571 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7572 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7573 PetscCall(PetscFree2(coo_i, coo_j)); 7574 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7575 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7576 coo_i = coo_i2; 7577 coo_j = coo_j2; 7578 } else { /* no offproc values insertion */ 7579 ncoo = ncoo_d; 7580 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7581 7582 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7583 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7584 PetscCall(PetscSFSetUp(mmdata->sf)); 7585 } 7586 mmdata->hasoffproc = hasoffproc; 7587 7588 /* gather (i,j) of nonzeros inserted locally */ 7589 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7590 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7591 PetscInt *coi = coo_i + ncoo_d; 7592 PetscInt *coj = coo_j + ncoo_d; 7593 const PetscInt *jj = mm->j; 7594 const PetscInt *ii = mm->i; 7595 const PetscInt *cmap = cmapa[cp]; 7596 const PetscInt *rmap = rmapa[cp]; 7597 const PetscInt mr = mp[cp]->rmap->n; 7598 const PetscInt rs = C->rmap->rstart; 7599 const PetscInt re = C->rmap->rend; 7600 const PetscInt cs = C->cmap->rstart; 7601 7602 if (mptmp[cp]) continue; 7603 if (rmapt[cp] == 1) { /* consecutive rows */ 7604 /* fill coo_i */ 7605 for (i = 0; i < mr; i++) { 7606 const PetscInt gr = i + rs; 7607 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7608 } 7609 /* fill coo_j */ 7610 if (!cmapt[cp]) { /* type-0, already global */ 7611 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7612 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7613 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7614 } else { /* type-2, local to global for sparse columns */ 7615 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7616 } 7617 ncoo_d += mm->nz; 7618 } else if (rmapt[cp] == 2) { /* sparse rows */ 7619 for (i = 0; i < mr; i++) { 7620 const PetscInt *jj = mm->j + ii[i]; 7621 const PetscInt gr = rmap[i]; 7622 const PetscInt nz = ii[i + 1] - ii[i]; 7623 if (gr >= rs && gr < re) { /* local rows */ 7624 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7625 if (!cmapt[cp]) { /* type-0, already global */ 7626 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7627 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7628 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7629 } else { /* type-2, local to global for sparse columns */ 7630 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7631 } 7632 ncoo_d += nz; 7633 } 7634 } 7635 } 7636 } 7637 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7638 PetscCall(ISDestroy(&glob)); 7639 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7640 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7641 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7642 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7643 7644 /* preallocate with COO data */ 7645 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7646 PetscCall(PetscFree2(coo_i, coo_j)); 7647 PetscFunctionReturn(PETSC_SUCCESS); 7648 } 7649 7650 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7651 { 7652 Mat_Product *product = mat->product; 7653 #if defined(PETSC_HAVE_DEVICE) 7654 PetscBool match = PETSC_FALSE; 7655 PetscBool usecpu = PETSC_FALSE; 7656 #else 7657 PetscBool match = PETSC_TRUE; 7658 #endif 7659 7660 PetscFunctionBegin; 7661 MatCheckProduct(mat, 1); 7662 #if defined(PETSC_HAVE_DEVICE) 7663 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7664 if (match) { /* we can always fallback to the CPU if requested */ 7665 switch (product->type) { 7666 case MATPRODUCT_AB: 7667 if (product->api_user) { 7668 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7669 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7670 PetscOptionsEnd(); 7671 } else { 7672 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7673 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7674 PetscOptionsEnd(); 7675 } 7676 break; 7677 case MATPRODUCT_AtB: 7678 if (product->api_user) { 7679 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7680 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7681 PetscOptionsEnd(); 7682 } else { 7683 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7684 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7685 PetscOptionsEnd(); 7686 } 7687 break; 7688 case MATPRODUCT_PtAP: 7689 if (product->api_user) { 7690 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7691 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7692 PetscOptionsEnd(); 7693 } else { 7694 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7695 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7696 PetscOptionsEnd(); 7697 } 7698 break; 7699 default: 7700 break; 7701 } 7702 match = (PetscBool)!usecpu; 7703 } 7704 #endif 7705 if (match) { 7706 switch (product->type) { 7707 case MATPRODUCT_AB: 7708 case MATPRODUCT_AtB: 7709 case MATPRODUCT_PtAP: 7710 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7711 break; 7712 default: 7713 break; 7714 } 7715 } 7716 /* fallback to MPIAIJ ops */ 7717 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7718 PetscFunctionReturn(PETSC_SUCCESS); 7719 } 7720 7721 /* 7722 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7723 7724 n - the number of block indices in cc[] 7725 cc - the block indices (must be large enough to contain the indices) 7726 */ 7727 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7728 { 7729 PetscInt cnt = -1, nidx, j; 7730 const PetscInt *idx; 7731 7732 PetscFunctionBegin; 7733 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7734 if (nidx) { 7735 cnt = 0; 7736 cc[cnt] = idx[0] / bs; 7737 for (j = 1; j < nidx; j++) { 7738 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7739 } 7740 } 7741 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7742 *n = cnt + 1; 7743 PetscFunctionReturn(PETSC_SUCCESS); 7744 } 7745 7746 /* 7747 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7748 7749 ncollapsed - the number of block indices 7750 collapsed - the block indices (must be large enough to contain the indices) 7751 */ 7752 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7753 { 7754 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7755 7756 PetscFunctionBegin; 7757 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7758 for (i = start + 1; i < start + bs; i++) { 7759 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7760 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7761 cprevtmp = cprev; 7762 cprev = merged; 7763 merged = cprevtmp; 7764 } 7765 *ncollapsed = nprev; 7766 if (collapsed) *collapsed = cprev; 7767 PetscFunctionReturn(PETSC_SUCCESS); 7768 } 7769 7770 /* 7771 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7772 7773 Input Parameter: 7774 . Amat - matrix 7775 - symmetrize - make the result symmetric 7776 + scale - scale with diagonal 7777 7778 Output Parameter: 7779 . a_Gmat - output scalar graph >= 0 7780 7781 */ 7782 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7783 { 7784 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7785 MPI_Comm comm; 7786 Mat Gmat; 7787 PetscBool ismpiaij, isseqaij; 7788 Mat a, b, c; 7789 MatType jtype; 7790 7791 PetscFunctionBegin; 7792 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7793 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7794 PetscCall(MatGetSize(Amat, &MM, &NN)); 7795 PetscCall(MatGetBlockSize(Amat, &bs)); 7796 nloc = (Iend - Istart) / bs; 7797 7798 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7799 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7800 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7801 7802 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7803 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7804 implementation */ 7805 if (bs > 1) { 7806 PetscCall(MatGetType(Amat, &jtype)); 7807 PetscCall(MatCreate(comm, &Gmat)); 7808 PetscCall(MatSetType(Gmat, jtype)); 7809 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7810 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7811 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7812 PetscInt *d_nnz, *o_nnz; 7813 MatScalar *aa, val, *AA; 7814 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7815 7816 if (isseqaij) { 7817 a = Amat; 7818 b = NULL; 7819 } else { 7820 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7821 a = d->A; 7822 b = d->B; 7823 } 7824 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7825 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7826 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7827 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7828 const PetscInt *cols1, *cols2; 7829 7830 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7831 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7832 nnz[brow / bs] = nc2 / bs; 7833 if (nc2 % bs) ok = 0; 7834 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7835 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7836 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7837 if (nc1 != nc2) ok = 0; 7838 else { 7839 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7840 if (cols1[jj] != cols2[jj]) ok = 0; 7841 if (cols1[jj] % bs != jj % bs) ok = 0; 7842 } 7843 } 7844 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7845 } 7846 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7847 if (!ok) { 7848 PetscCall(PetscFree2(d_nnz, o_nnz)); 7849 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7850 goto old_bs; 7851 } 7852 } 7853 } 7854 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7855 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7856 PetscCall(PetscFree2(d_nnz, o_nnz)); 7857 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7858 // diag 7859 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7860 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7861 7862 ai = aseq->i; 7863 n = ai[brow + 1] - ai[brow]; 7864 aj = aseq->j + ai[brow]; 7865 for (PetscInt k = 0; k < n; k += bs) { // block columns 7866 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7867 val = 0; 7868 if (index_size == 0) { 7869 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7870 aa = aseq->a + ai[brow + ii] + k; 7871 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7872 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7873 } 7874 } 7875 } else { // use (index,index) value if provided 7876 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7877 PetscInt ii = index[iii]; 7878 aa = aseq->a + ai[brow + ii] + k; 7879 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7880 PetscInt jj = index[jjj]; 7881 val += PetscAbs(PetscRealPart(aa[jj])); 7882 } 7883 } 7884 } 7885 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7886 AA[k / bs] = val; 7887 } 7888 grow = Istart / bs + brow / bs; 7889 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7890 } 7891 // off-diag 7892 if (ismpiaij) { 7893 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7894 const PetscScalar *vals; 7895 const PetscInt *cols, *garray = aij->garray; 7896 7897 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7898 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7899 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7900 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7901 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7902 AA[k / bs] = 0; 7903 AJ[cidx] = garray[cols[k]] / bs; 7904 } 7905 nc = ncols / bs; 7906 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7907 if (index_size == 0) { 7908 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7909 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7910 for (PetscInt k = 0; k < ncols; k += bs) { 7911 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7912 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7913 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7914 } 7915 } 7916 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7917 } 7918 } else { // use (index,index) value if provided 7919 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7920 PetscInt ii = index[iii]; 7921 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7922 for (PetscInt k = 0; k < ncols; k += bs) { 7923 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7924 PetscInt jj = index[jjj]; 7925 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7926 } 7927 } 7928 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7929 } 7930 } 7931 grow = Istart / bs + brow / bs; 7932 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7933 } 7934 } 7935 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7936 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7937 PetscCall(PetscFree2(AA, AJ)); 7938 } else { 7939 const PetscScalar *vals; 7940 const PetscInt *idx; 7941 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7942 old_bs: 7943 /* 7944 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7945 */ 7946 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7947 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7948 if (isseqaij) { 7949 PetscInt max_d_nnz; 7950 7951 /* 7952 Determine exact preallocation count for (sequential) scalar matrix 7953 */ 7954 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7955 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7956 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7957 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7958 PetscCall(PetscFree3(w0, w1, w2)); 7959 } else if (ismpiaij) { 7960 Mat Daij, Oaij; 7961 const PetscInt *garray; 7962 PetscInt max_d_nnz; 7963 7964 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7965 /* 7966 Determine exact preallocation count for diagonal block portion of scalar matrix 7967 */ 7968 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7969 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7970 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7971 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7972 PetscCall(PetscFree3(w0, w1, w2)); 7973 /* 7974 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7975 */ 7976 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7977 o_nnz[jj] = 0; 7978 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7979 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7980 o_nnz[jj] += ncols; 7981 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7982 } 7983 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7984 } 7985 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7986 /* get scalar copy (norms) of matrix */ 7987 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7988 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7989 PetscCall(PetscFree2(d_nnz, o_nnz)); 7990 for (Ii = Istart; Ii < Iend; Ii++) { 7991 PetscInt dest_row = Ii / bs; 7992 7993 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7994 for (jj = 0; jj < ncols; jj++) { 7995 PetscInt dest_col = idx[jj] / bs; 7996 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7997 7998 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7999 } 8000 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8001 } 8002 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8003 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8004 } 8005 } else { 8006 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8007 else { 8008 Gmat = Amat; 8009 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8010 } 8011 if (isseqaij) { 8012 a = Gmat; 8013 b = NULL; 8014 } else { 8015 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8016 a = d->A; 8017 b = d->B; 8018 } 8019 if (filter >= 0 || scale) { 8020 /* take absolute value of each entry */ 8021 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8022 MatInfo info; 8023 PetscScalar *avals; 8024 8025 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8026 PetscCall(MatSeqAIJGetArray(c, &avals)); 8027 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8028 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8029 } 8030 } 8031 } 8032 if (symmetrize) { 8033 PetscBool isset, issym; 8034 8035 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8036 if (!isset || !issym) { 8037 Mat matTrans; 8038 8039 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8040 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8041 PetscCall(MatDestroy(&matTrans)); 8042 } 8043 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8044 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8045 if (scale) { 8046 /* scale c for all diagonal values = 1 or -1 */ 8047 Vec diag; 8048 8049 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8050 PetscCall(MatGetDiagonal(Gmat, diag)); 8051 PetscCall(VecReciprocal(diag)); 8052 PetscCall(VecSqrtAbs(diag)); 8053 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8054 PetscCall(VecDestroy(&diag)); 8055 } 8056 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8057 if (filter >= 0) { 8058 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8059 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8060 } 8061 *a_Gmat = Gmat; 8062 PetscFunctionReturn(PETSC_SUCCESS); 8063 } 8064 8065 /* 8066 Special version for direct calls from Fortran 8067 */ 8068 8069 /* Change these macros so can be used in void function */ 8070 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8071 #undef PetscCall 8072 #define PetscCall(...) \ 8073 do { \ 8074 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8075 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8076 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8077 return; \ 8078 } \ 8079 } while (0) 8080 8081 #undef SETERRQ 8082 #define SETERRQ(comm, ierr, ...) \ 8083 do { \ 8084 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8085 return; \ 8086 } while (0) 8087 8088 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8089 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8090 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8091 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8092 #else 8093 #endif 8094 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8095 { 8096 Mat mat = *mmat; 8097 PetscInt m = *mm, n = *mn; 8098 InsertMode addv = *maddv; 8099 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8100 PetscScalar value; 8101 8102 MatCheckPreallocated(mat, 1); 8103 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8104 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8105 { 8106 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8107 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8108 PetscBool roworiented = aij->roworiented; 8109 8110 /* Some Variables required in the macro */ 8111 Mat A = aij->A; 8112 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8113 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8114 MatScalar *aa; 8115 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8116 Mat B = aij->B; 8117 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8118 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8119 MatScalar *ba; 8120 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8121 * cannot use "#if defined" inside a macro. */ 8122 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8123 8124 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8125 PetscInt nonew = a->nonew; 8126 MatScalar *ap1, *ap2; 8127 8128 PetscFunctionBegin; 8129 PetscCall(MatSeqAIJGetArray(A, &aa)); 8130 PetscCall(MatSeqAIJGetArray(B, &ba)); 8131 for (i = 0; i < m; i++) { 8132 if (im[i] < 0) continue; 8133 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8134 if (im[i] >= rstart && im[i] < rend) { 8135 row = im[i] - rstart; 8136 lastcol1 = -1; 8137 rp1 = aj + ai[row]; 8138 ap1 = aa + ai[row]; 8139 rmax1 = aimax[row]; 8140 nrow1 = ailen[row]; 8141 low1 = 0; 8142 high1 = nrow1; 8143 lastcol2 = -1; 8144 rp2 = bj + bi[row]; 8145 ap2 = ba + bi[row]; 8146 rmax2 = bimax[row]; 8147 nrow2 = bilen[row]; 8148 low2 = 0; 8149 high2 = nrow2; 8150 8151 for (j = 0; j < n; j++) { 8152 if (roworiented) value = v[i * n + j]; 8153 else value = v[i + j * m]; 8154 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8155 if (in[j] >= cstart && in[j] < cend) { 8156 col = in[j] - cstart; 8157 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8158 } else if (in[j] < 0) continue; 8159 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8160 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8161 } else { 8162 if (mat->was_assembled) { 8163 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8164 #if defined(PETSC_USE_CTABLE) 8165 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8166 col--; 8167 #else 8168 col = aij->colmap[in[j]] - 1; 8169 #endif 8170 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8171 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8172 col = in[j]; 8173 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8174 B = aij->B; 8175 b = (Mat_SeqAIJ *)B->data; 8176 bimax = b->imax; 8177 bi = b->i; 8178 bilen = b->ilen; 8179 bj = b->j; 8180 rp2 = bj + bi[row]; 8181 ap2 = ba + bi[row]; 8182 rmax2 = bimax[row]; 8183 nrow2 = bilen[row]; 8184 low2 = 0; 8185 high2 = nrow2; 8186 bm = aij->B->rmap->n; 8187 ba = b->a; 8188 inserted = PETSC_FALSE; 8189 } 8190 } else col = in[j]; 8191 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8192 } 8193 } 8194 } else if (!aij->donotstash) { 8195 if (roworiented) { 8196 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8197 } else { 8198 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8199 } 8200 } 8201 } 8202 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8203 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8204 } 8205 PetscFunctionReturnVoid(); 8206 } 8207 8208 /* Undefining these here since they were redefined from their original definition above! No 8209 * other PETSc functions should be defined past this point, as it is impossible to recover the 8210 * original definitions */ 8211 #undef PetscCall 8212 #undef SETERRQ 8213