1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 a_noinsert:; \ 422 ailen[row] = nrow1; \ 423 } while (0) 424 425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 426 do { \ 427 if (col <= lastcol2) low2 = 0; \ 428 else high2 = nrow2; \ 429 lastcol2 = col; \ 430 while (high2 - low2 > 5) { \ 431 t = (low2 + high2) / 2; \ 432 if (rp2[t] > col) high2 = t; \ 433 else low2 = t; \ 434 } \ 435 for (_i = low2; _i < high2; _i++) { \ 436 if (rp2[_i] > col) break; \ 437 if (rp2[_i] == col) { \ 438 if (addv == ADD_VALUES) { \ 439 ap2[_i] += value; \ 440 (void)PetscLogFlops(1.0); \ 441 } else ap2[_i] = value; \ 442 goto b_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) { \ 446 low2 = 0; \ 447 high2 = nrow2; \ 448 goto b_noinsert; \ 449 } \ 450 if (nonew == 1) { \ 451 low2 = 0; \ 452 high2 = nrow2; \ 453 goto b_noinsert; \ 454 } \ 455 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 456 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 457 N = nrow2++ - 1; \ 458 b->nz++; \ 459 high2++; \ 460 /* shift up all the later entries in this row */ \ 461 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 462 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 463 rp2[_i] = col; \ 464 ap2[_i] = value; \ 465 b_noinsert:; \ 466 bilen[row] = nrow2; \ 467 } while (0) 468 469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 470 { 471 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 472 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 473 PetscInt l, *garray = mat->garray, diag; 474 PetscScalar *aa, *ba; 475 476 PetscFunctionBegin; 477 /* code only works for square matrices A */ 478 479 /* find size of row to the left of the diagonal part */ 480 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 481 row = row - diag; 482 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 483 if (garray[b->j[b->i[row] + l]] > diag) break; 484 } 485 if (l) { 486 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 487 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 488 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 489 } 490 491 /* diagonal part */ 492 if (a->i[row + 1] - a->i[row]) { 493 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 494 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 495 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 496 } 497 498 /* right of diagonal part */ 499 if (b->i[row + 1] - b->i[row] - l) { 500 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 501 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 502 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 503 } 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 510 PetscScalar value = 0.0; 511 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 512 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 513 PetscBool roworiented = aij->roworiented; 514 515 /* Some Variables required in the macro */ 516 Mat A = aij->A; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 518 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 519 PetscBool ignorezeroentries = a->ignorezeroentries; 520 Mat B = aij->B; 521 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 522 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 523 MatScalar *aa, *ba; 524 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 525 PetscInt nonew; 526 MatScalar *ap1, *ap2; 527 528 PetscFunctionBegin; 529 PetscCall(MatSeqAIJGetArray(A, &aa)); 530 PetscCall(MatSeqAIJGetArray(B, &ba)); 531 for (i = 0; i < m; i++) { 532 if (im[i] < 0) continue; 533 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 534 if (im[i] >= rstart && im[i] < rend) { 535 row = im[i] - rstart; 536 lastcol1 = -1; 537 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 538 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 539 rmax1 = aimax[row]; 540 nrow1 = ailen[row]; 541 low1 = 0; 542 high1 = nrow1; 543 lastcol2 = -1; 544 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 545 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 546 rmax2 = bimax[row]; 547 nrow2 = bilen[row]; 548 low2 = 0; 549 high2 = nrow2; 550 551 for (j = 0; j < n; j++) { 552 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 553 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 554 if (in[j] >= cstart && in[j] < cend) { 555 col = in[j] - cstart; 556 nonew = a->nonew; 557 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 558 } else if (in[j] < 0) { 559 continue; 560 } else { 561 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 562 if (mat->was_assembled) { 563 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 564 #if defined(PETSC_USE_CTABLE) 565 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 566 col--; 567 #else 568 col = aij->colmap[in[j]] - 1; 569 #endif 570 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 571 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 572 col = in[j]; 573 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 574 B = aij->B; 575 b = (Mat_SeqAIJ *)B->data; 576 bimax = b->imax; 577 bi = b->i; 578 bilen = b->ilen; 579 bj = b->j; 580 ba = b->a; 581 rp2 = bj + bi[row]; 582 ap2 = ba + bi[row]; 583 rmax2 = bimax[row]; 584 nrow2 = bilen[row]; 585 low2 = 0; 586 high2 = nrow2; 587 bm = aij->B->rmap->n; 588 ba = b->a; 589 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 590 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 591 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 592 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 593 } 594 } else col = in[j]; 595 nonew = b->nonew; 596 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 597 } 598 } 599 } else { 600 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 601 if (!aij->donotstash) { 602 mat->assembled = PETSC_FALSE; 603 if (roworiented) { 604 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 605 } else { 606 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } 608 } 609 } 610 } 611 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 612 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 /* 617 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 618 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 619 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 620 */ 621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 622 { 623 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 624 Mat A = aij->A; /* diagonal part of the matrix */ 625 Mat B = aij->B; /* off-diagonal part of the matrix */ 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 627 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 628 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 629 PetscInt *ailen = a->ilen, *aj = a->j; 630 PetscInt *bilen = b->ilen, *bj = b->j; 631 PetscInt am = aij->A->rmap->n, j; 632 PetscInt diag_so_far = 0, dnz; 633 PetscInt offd_so_far = 0, onz; 634 635 PetscFunctionBegin; 636 /* Iterate over all rows of the matrix */ 637 for (j = 0; j < am; j++) { 638 dnz = onz = 0; 639 /* Iterate over all non-zero columns of the current row */ 640 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 641 /* If column is in the diagonal */ 642 if (mat_j[col] >= cstart && mat_j[col] < cend) { 643 aj[diag_so_far++] = mat_j[col] - cstart; 644 dnz++; 645 } else { /* off-diagonal entries */ 646 bj[offd_so_far++] = mat_j[col]; 647 onz++; 648 } 649 } 650 ailen[j] = dnz; 651 bilen[j] = onz; 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 /* 657 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 660 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 661 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 662 */ 663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 664 { 665 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 666 Mat A = aij->A; /* diagonal part of the matrix */ 667 Mat B = aij->B; /* off-diagonal part of the matrix */ 668 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 670 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 671 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 672 PetscInt *ailen = a->ilen, *aj = a->j; 673 PetscInt *bilen = b->ilen, *bj = b->j; 674 PetscInt am = aij->A->rmap->n, j; 675 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 676 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 677 PetscScalar *aa = a->a, *ba = b->a; 678 679 PetscFunctionBegin; 680 /* Iterate over all rows of the matrix */ 681 for (j = 0; j < am; j++) { 682 dnz_row = onz_row = 0; 683 rowstart_offd = full_offd_i[j]; 684 rowstart_diag = full_diag_i[j]; 685 /* Iterate over all non-zero columns of the current row */ 686 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 687 /* If column is in the diagonal */ 688 if (mat_j[col] >= cstart && mat_j[col] < cend) { 689 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 690 aa[rowstart_diag + dnz_row] = mat_a[col]; 691 dnz_row++; 692 } else { /* off-diagonal entries */ 693 bj[rowstart_offd + onz_row] = mat_j[col]; 694 ba[rowstart_offd + onz_row] = mat_a[col]; 695 onz_row++; 696 } 697 } 698 ailen[j] = dnz_row; 699 bilen[j] = onz_row; 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 705 { 706 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 707 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 708 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 709 710 PetscFunctionBegin; 711 for (i = 0; i < m; i++) { 712 if (idxm[i] < 0) continue; /* negative row */ 713 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 714 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 715 row = idxm[i] - rstart; 716 for (j = 0; j < n; j++) { 717 if (idxn[j] < 0) continue; /* negative column */ 718 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 719 if (idxn[j] >= cstart && idxn[j] < cend) { 720 col = idxn[j] - cstart; 721 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 722 } else { 723 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 724 #if defined(PETSC_USE_CTABLE) 725 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 726 col--; 727 #else 728 col = aij->colmap[idxn[j]] - 1; 729 #endif 730 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 731 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 732 } 733 } 734 } 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 741 PetscInt nstash, reallocs; 742 743 PetscFunctionBegin; 744 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 745 746 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 748 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 755 PetscMPIInt n; 756 PetscInt i, j, rstart, ncols, flg; 757 PetscInt *row, *col; 758 PetscBool other_disassembled; 759 PetscScalar *val; 760 761 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 762 763 PetscFunctionBegin; 764 if (!aij->donotstash && !mat->nooffprocentries) { 765 while (1) { 766 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 767 if (!flg) break; 768 769 for (i = 0; i < n;) { 770 /* Now identify the consecutive vals belonging to the same row */ 771 for (j = i, rstart = row[j]; j < n; j++) { 772 if (row[j] != rstart) break; 773 } 774 if (j < n) ncols = j - i; 775 else ncols = n - i; 776 /* Now assemble all these values with a single function call */ 777 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 778 i = j; 779 } 780 } 781 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 782 } 783 #if defined(PETSC_HAVE_DEVICE) 784 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 785 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 786 if (mat->boundtocpu) { 787 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 788 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 789 } 790 #endif 791 PetscCall(MatAssemblyBegin(aij->A, mode)); 792 PetscCall(MatAssemblyEnd(aij->A, mode)); 793 794 /* determine if any processor has disassembled, if so we must 795 also disassemble ourself, in order that we may reassemble. */ 796 /* 797 if nonzero structure of submatrix B cannot change then we know that 798 no processor disassembled thus we can skip this stuff 799 */ 800 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 801 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 802 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 803 PetscCall(MatDisAssemble_MPIAIJ(mat)); 804 } 805 } 806 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 807 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 808 #if defined(PETSC_HAVE_DEVICE) 809 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 810 #endif 811 PetscCall(MatAssemblyBegin(aij->B, mode)); 812 PetscCall(MatAssemblyEnd(aij->B, mode)); 813 814 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 815 816 aij->rowvalues = NULL; 817 818 PetscCall(VecDestroy(&aij->diag)); 819 820 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 821 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 822 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 823 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 824 } 825 #if defined(PETSC_HAVE_DEVICE) 826 mat->offloadmask = PETSC_OFFLOAD_BOTH; 827 #endif 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 832 { 833 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 834 835 PetscFunctionBegin; 836 PetscCall(MatZeroEntries(l->A)); 837 PetscCall(MatZeroEntries(l->B)); 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 842 { 843 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 844 PetscInt *lrows; 845 PetscInt r, len; 846 PetscBool cong; 847 848 PetscFunctionBegin; 849 /* get locally owned rows */ 850 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 851 PetscCall(MatHasCongruentLayouts(A, &cong)); 852 /* fix right-hand side if needed */ 853 if (x && b) { 854 const PetscScalar *xx; 855 PetscScalar *bb; 856 857 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 858 PetscCall(VecGetArrayRead(x, &xx)); 859 PetscCall(VecGetArray(b, &bb)); 860 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 861 PetscCall(VecRestoreArrayRead(x, &xx)); 862 PetscCall(VecRestoreArray(b, &bb)); 863 } 864 865 if (diag != 0.0 && cong) { 866 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 867 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 868 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 869 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 870 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 871 PetscInt nnwA, nnwB; 872 PetscBool nnzA, nnzB; 873 874 nnwA = aijA->nonew; 875 nnwB = aijB->nonew; 876 nnzA = aijA->keepnonzeropattern; 877 nnzB = aijB->keepnonzeropattern; 878 if (!nnzA) { 879 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 880 aijA->nonew = 0; 881 } 882 if (!nnzB) { 883 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 884 aijB->nonew = 0; 885 } 886 /* Must zero here before the next loop */ 887 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 for (r = 0; r < len; ++r) { 890 const PetscInt row = lrows[r] + A->rmap->rstart; 891 if (row >= A->cmap->N) continue; 892 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 893 } 894 aijA->nonew = nnwA; 895 aijB->nonew = nnwB; 896 } else { 897 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 898 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 899 } 900 PetscCall(PetscFree(lrows)); 901 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 902 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 903 904 /* only change matrix nonzero state if pattern was allowed to be changed */ 905 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 906 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 907 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 908 } 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 913 { 914 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i, j, r, m, len = 0; 917 PetscInt *lrows, *owners = A->rmap->range; 918 PetscMPIInt p = 0; 919 PetscSFNode *rrows; 920 PetscSF sf; 921 const PetscScalar *xx; 922 PetscScalar *bb, *mask, *aij_a; 923 Vec xmask, lmask; 924 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 925 const PetscInt *aj, *ii, *ridx; 926 PetscScalar *aa; 927 928 PetscFunctionBegin; 929 /* Create SF where leaves are input rows and roots are owned rows */ 930 PetscCall(PetscMalloc1(n, &lrows)); 931 for (r = 0; r < n; ++r) lrows[r] = -1; 932 PetscCall(PetscMalloc1(N, &rrows)); 933 for (r = 0; r < N; ++r) { 934 const PetscInt idx = rows[r]; 935 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 936 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 937 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 938 } 939 rrows[r].rank = p; 940 rrows[r].index = rows[r] - owners[p]; 941 } 942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 943 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 944 /* Collect flags for rows to be zeroed */ 945 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 946 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 947 PetscCall(PetscSFDestroy(&sf)); 948 /* Compress and put in row numbers */ 949 for (r = 0; r < n; ++r) 950 if (lrows[r] >= 0) lrows[len++] = r; 951 /* zero diagonal part of matrix */ 952 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 953 /* handle off-diagonal part of matrix */ 954 PetscCall(MatCreateVecs(A, &xmask, NULL)); 955 PetscCall(VecDuplicate(l->lvec, &lmask)); 956 PetscCall(VecGetArray(xmask, &bb)); 957 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 958 PetscCall(VecRestoreArray(xmask, &bb)); 959 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 960 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 961 PetscCall(VecDestroy(&xmask)); 962 if (x && b) { /* this code is buggy when the row and column layout don't match */ 963 PetscBool cong; 964 965 PetscCall(MatHasCongruentLayouts(A, &cong)); 966 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 967 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscCall(VecGetArrayRead(l->lvec, &xx)); 970 PetscCall(VecGetArray(b, &bb)); 971 } 972 PetscCall(VecGetArray(lmask, &mask)); 973 /* remove zeroed rows of off-diagonal matrix */ 974 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 975 ii = aij->i; 976 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 977 /* loop over all elements of off process part of matrix zeroing removed columns*/ 978 if (aij->compressedrow.use) { 979 m = aij->compressedrow.nrows; 980 ii = aij->compressedrow.i; 981 ridx = aij->compressedrow.rindex; 982 for (i = 0; i < m; i++) { 983 n = ii[i + 1] - ii[i]; 984 aj = aij->j + ii[i]; 985 aa = aij_a + ii[i]; 986 987 for (j = 0; j < n; j++) { 988 if (PetscAbsScalar(mask[*aj])) { 989 if (b) bb[*ridx] -= *aa * xx[*aj]; 990 *aa = 0.0; 991 } 992 aa++; 993 aj++; 994 } 995 ridx++; 996 } 997 } else { /* do not use compressed row format */ 998 m = l->B->rmap->n; 999 for (i = 0; i < m; i++) { 1000 n = ii[i + 1] - ii[i]; 1001 aj = aij->j + ii[i]; 1002 aa = aij_a + ii[i]; 1003 for (j = 0; j < n; j++) { 1004 if (PetscAbsScalar(mask[*aj])) { 1005 if (b) bb[i] -= *aa * xx[*aj]; 1006 *aa = 0.0; 1007 } 1008 aa++; 1009 aj++; 1010 } 1011 } 1012 } 1013 if (x && b) { 1014 PetscCall(VecRestoreArray(b, &bb)); 1015 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1016 } 1017 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1018 PetscCall(VecRestoreArray(lmask, &mask)); 1019 PetscCall(VecDestroy(&lmask)); 1020 PetscCall(PetscFree(lrows)); 1021 1022 /* only change matrix nonzero state if pattern was allowed to be changed */ 1023 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1024 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1025 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1026 } 1027 PetscFunctionReturn(PETSC_SUCCESS); 1028 } 1029 1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1033 PetscInt nt; 1034 VecScatter Mvctx = a->Mvctx; 1035 1036 PetscFunctionBegin; 1037 PetscCall(VecGetLocalSize(xx, &nt)); 1038 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1039 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1040 PetscUseTypeMethod(a->A, mult, xx, yy); 1041 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1047 { 1048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1080 PetscFunctionReturn(PETSC_SUCCESS); 1081 } 1082 1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1084 { 1085 MPI_Comm comm; 1086 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1087 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1088 IS Me, Notme; 1089 PetscInt M, N, first, last, *notme, i; 1090 PetscBool lf; 1091 PetscMPIInt size; 1092 1093 PetscFunctionBegin; 1094 /* Easy test: symmetric diagonal block */ 1095 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1096 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1097 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1098 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1099 PetscCallMPI(MPI_Comm_size(comm, &size)); 1100 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1101 1102 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1103 PetscCall(MatGetSize(Amat, &M, &N)); 1104 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1105 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1106 for (i = 0; i < first; i++) notme[i] = i; 1107 for (i = last; i < M; i++) notme[i - last + first] = i; 1108 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1109 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1110 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1111 Aoff = Aoffs[0]; 1112 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1113 Boff = Boffs[0]; 1114 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1115 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1116 PetscCall(MatDestroyMatrices(1, &Boffs)); 1117 PetscCall(ISDestroy(&Me)); 1118 PetscCall(ISDestroy(&Notme)); 1119 PetscCall(PetscFree(notme)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1126 1127 PetscFunctionBegin; 1128 /* do nondiagonal part */ 1129 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1130 /* do local part */ 1131 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1132 /* add partial results together */ 1133 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1134 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1135 PetscFunctionReturn(PETSC_SUCCESS); 1136 } 1137 1138 /* 1139 This only works correctly for square matrices where the subblock A->A is the 1140 diagonal block 1141 */ 1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1148 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1149 PetscCall(MatGetDiagonal(a->A, v)); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCall(MatScale(a->A, aa)); 1159 PetscCall(MatScale(a->B, aa)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1168 const PetscInt *garray = aij->garray; 1169 const PetscScalar *aa, *ba; 1170 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1171 PetscInt64 nz, hnz; 1172 PetscInt *rowlens; 1173 PetscInt *colidxs; 1174 PetscScalar *matvals; 1175 PetscMPIInt rank; 1176 1177 PetscFunctionBegin; 1178 PetscCall(PetscViewerSetUp(viewer)); 1179 1180 M = mat->rmap->N; 1181 N = mat->cmap->N; 1182 m = mat->rmap->n; 1183 rs = mat->rmap->rstart; 1184 cs = mat->cmap->rstart; 1185 nz = A->nz + B->nz; 1186 1187 /* write matrix header */ 1188 header[0] = MAT_FILE_CLASSID; 1189 header[1] = M; 1190 header[2] = N; 1191 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1193 if (rank == 0) { 1194 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1195 else header[3] = (PetscInt)hnz; 1196 } 1197 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1198 1199 /* fill in and store row lengths */ 1200 PetscCall(PetscMalloc1(m, &rowlens)); 1201 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1203 PetscCall(PetscFree(rowlens)); 1204 1205 /* fill in and store column indices */ 1206 PetscCall(PetscMalloc1(nz, &colidxs)); 1207 for (cnt = 0, i = 0; i < m; i++) { 1208 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1209 if (garray[B->j[jb]] > cs) break; 1210 colidxs[cnt++] = garray[B->j[jb]]; 1211 } 1212 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1213 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1214 } 1215 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1217 PetscCall(PetscFree(colidxs)); 1218 1219 /* fill in and store nonzero values */ 1220 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1221 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1222 PetscCall(PetscMalloc1(nz, &matvals)); 1223 for (cnt = 0, i = 0; i < m; i++) { 1224 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1225 if (garray[B->j[jb]] > cs) break; 1226 matvals[cnt++] = ba[jb]; 1227 } 1228 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1229 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1230 } 1231 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1232 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1235 PetscCall(PetscFree(matvals)); 1236 1237 /* write block size option to the viewer's .info file */ 1238 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 #include <petscdraw.h> 1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1244 { 1245 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1246 PetscMPIInt rank = aij->rank, size = aij->size; 1247 PetscBool isdraw, iascii, isbinary; 1248 PetscViewer sviewer; 1249 PetscViewerFormat format; 1250 1251 PetscFunctionBegin; 1252 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1253 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1255 if (iascii) { 1256 PetscCall(PetscViewerGetFormat(viewer, &format)); 1257 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1258 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1259 PetscCall(PetscMalloc1(size, &nz)); 1260 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1261 for (i = 0; i < (PetscInt)size; i++) { 1262 nmax = PetscMax(nmax, nz[i]); 1263 nmin = PetscMin(nmin, nz[i]); 1264 navg += nz[i]; 1265 } 1266 PetscCall(PetscFree(nz)); 1267 navg = navg / size; 1268 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 PetscCall(PetscViewerGetFormat(viewer, &format)); 1272 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1273 MatInfo info; 1274 PetscInt *inodes = NULL; 1275 1276 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1277 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1278 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1279 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1280 if (!inodes) { 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1282 (double)info.memory)); 1283 } else { 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1285 (double)info.memory)); 1286 } 1287 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1289 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(PetscViewerFlush(viewer)); 1292 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1293 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1294 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1297 PetscInt inodecount, inodelimit, *inodes; 1298 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1299 if (inodes) { 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1301 } else { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1303 } 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 } else if (isbinary) { 1309 if (size == 1) { 1310 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1311 PetscCall(MatView(aij->A, viewer)); 1312 } else { 1313 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } else if (iascii && size == 1) { 1317 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1318 PetscCall(MatView(aij->A, viewer)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } else if (isdraw) { 1321 PetscDraw draw; 1322 PetscBool isnull; 1323 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1324 PetscCall(PetscDrawIsNull(draw, &isnull)); 1325 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 { /* assemble the entire matrix onto first processor */ 1329 Mat A = NULL, Av; 1330 IS isrow, iscol; 1331 1332 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1333 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1334 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1335 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1336 /* The commented code uses MatCreateSubMatrices instead */ 1337 /* 1338 Mat *AA, A = NULL, Av; 1339 IS isrow,iscol; 1340 1341 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1343 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1344 if (rank == 0) { 1345 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1346 A = AA[0]; 1347 Av = AA[0]; 1348 } 1349 PetscCall(MatDestroySubMatrices(1,&AA)); 1350 */ 1351 PetscCall(ISDestroy(&iscol)); 1352 PetscCall(ISDestroy(&isrow)); 1353 /* 1354 Everyone has to call to draw the matrix since the graphics waits are 1355 synchronized across all processors that share the PetscDraw object 1356 */ 1357 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1358 if (rank == 0) { 1359 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1360 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1361 } 1362 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1363 PetscCall(MatDestroy(&A)); 1364 } 1365 PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1369 { 1370 PetscBool iascii, isdraw, issocket, isbinary; 1371 1372 PetscFunctionBegin; 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1374 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1377 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1382 { 1383 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1384 Vec bb1 = NULL; 1385 PetscBool hasop; 1386 1387 PetscFunctionBegin; 1388 if (flag == SOR_APPLY_UPPER) { 1389 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1394 1395 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1396 if (flag & SOR_ZERO_INITIAL_GUESS) { 1397 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1398 its--; 1399 } 1400 1401 while (its--) { 1402 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1403 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1404 1405 /* update rhs: bb1 = bb - B*x */ 1406 PetscCall(VecScale(mat->lvec, -1.0)); 1407 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1408 1409 /* local sweep */ 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1411 } 1412 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1413 if (flag & SOR_ZERO_INITIAL_GUESS) { 1414 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1415 its--; 1416 } 1417 while (its--) { 1418 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1420 1421 /* update rhs: bb1 = bb - B*x */ 1422 PetscCall(VecScale(mat->lvec, -1.0)); 1423 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1424 1425 /* local sweep */ 1426 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1427 } 1428 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 while (its--) { 1434 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 1437 /* update rhs: bb1 = bb - B*x */ 1438 PetscCall(VecScale(mat->lvec, -1.0)); 1439 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1440 1441 /* local sweep */ 1442 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1443 } 1444 } else if (flag & SOR_EISENSTAT) { 1445 Vec xx1; 1446 1447 PetscCall(VecDuplicate(bb, &xx1)); 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1449 1450 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 if (!mat->diag) { 1453 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1454 PetscCall(MatGetDiagonal(matin, mat->diag)); 1455 } 1456 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1457 if (hasop) { 1458 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1459 } else { 1460 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1461 } 1462 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1463 1464 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1468 PetscCall(VecAXPY(xx, 1.0, xx1)); 1469 PetscCall(VecDestroy(&xx1)); 1470 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1471 1472 PetscCall(VecDestroy(&bb1)); 1473 1474 matin->factorerrortype = mat->A->factorerrortype; 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1479 { 1480 Mat aA, aB, Aperm; 1481 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1482 PetscScalar *aa, *ba; 1483 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1484 PetscSF rowsf, sf; 1485 IS parcolp = NULL; 1486 PetscBool done; 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatGetLocalSize(A, &m, &n)); 1490 PetscCall(ISGetIndices(rowp, &rwant)); 1491 PetscCall(ISGetIndices(colp, &cwant)); 1492 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1493 1494 /* Invert row permutation to find out where my rows should go */ 1495 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1496 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1497 PetscCall(PetscSFSetFromOptions(rowsf)); 1498 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1499 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1500 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1501 1502 /* Invert column permutation to find out where my columns should go */ 1503 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1504 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1505 PetscCall(PetscSFSetFromOptions(sf)); 1506 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1507 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1508 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1509 PetscCall(PetscSFDestroy(&sf)); 1510 1511 PetscCall(ISRestoreIndices(rowp, &rwant)); 1512 PetscCall(ISRestoreIndices(colp, &cwant)); 1513 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1514 1515 /* Find out where my gcols should go */ 1516 PetscCall(MatGetSize(aB, NULL, &ng)); 1517 PetscCall(PetscMalloc1(ng, &gcdest)); 1518 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1519 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1520 PetscCall(PetscSFSetFromOptions(sf)); 1521 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1522 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1526 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1527 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1528 for (i = 0; i < m; i++) { 1529 PetscInt row = rdest[i]; 1530 PetscMPIInt rowner; 1531 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1532 for (j = ai[i]; j < ai[i + 1]; j++) { 1533 PetscInt col = cdest[aj[j]]; 1534 PetscMPIInt cowner; 1535 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1536 if (rowner == cowner) dnnz[i]++; 1537 else onnz[i]++; 1538 } 1539 for (j = bi[i]; j < bi[i + 1]; j++) { 1540 PetscInt col = gcdest[bj[j]]; 1541 PetscMPIInt cowner; 1542 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1543 if (rowner == cowner) dnnz[i]++; 1544 else onnz[i]++; 1545 } 1546 } 1547 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1548 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1551 PetscCall(PetscSFDestroy(&rowsf)); 1552 1553 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1554 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1555 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1556 for (i = 0; i < m; i++) { 1557 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1558 PetscInt j0, rowlen; 1559 rowlen = ai[i + 1] - ai[i]; 1560 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1561 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1562 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1563 } 1564 rowlen = bi[i + 1] - bi[i]; 1565 for (j0 = j = 0; j < rowlen; j0 = j) { 1566 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1567 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1568 } 1569 } 1570 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1573 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1574 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1575 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1576 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1577 PetscCall(PetscFree3(work, rdest, cdest)); 1578 PetscCall(PetscFree(gcdest)); 1579 if (parcolp) PetscCall(ISDestroy(&colp)); 1580 *B = Aperm; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1585 { 1586 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1587 1588 PetscFunctionBegin; 1589 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1590 if (ghosts) *ghosts = aij->garray; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1595 { 1596 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1597 Mat A = mat->A, B = mat->B; 1598 PetscLogDouble isend[5], irecv[5]; 1599 1600 PetscFunctionBegin; 1601 info->block_size = 1.0; 1602 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1603 1604 isend[0] = info->nz_used; 1605 isend[1] = info->nz_allocated; 1606 isend[2] = info->nz_unneeded; 1607 isend[3] = info->memory; 1608 isend[4] = info->mallocs; 1609 1610 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1611 1612 isend[0] += info->nz_used; 1613 isend[1] += info->nz_allocated; 1614 isend[2] += info->nz_unneeded; 1615 isend[3] += info->memory; 1616 isend[4] += info->mallocs; 1617 if (flag == MAT_LOCAL) { 1618 info->nz_used = isend[0]; 1619 info->nz_allocated = isend[1]; 1620 info->nz_unneeded = isend[2]; 1621 info->memory = isend[3]; 1622 info->mallocs = isend[4]; 1623 } else if (flag == MAT_GLOBAL_MAX) { 1624 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1625 1626 info->nz_used = irecv[0]; 1627 info->nz_allocated = irecv[1]; 1628 info->nz_unneeded = irecv[2]; 1629 info->memory = irecv[3]; 1630 info->mallocs = irecv[4]; 1631 } else if (flag == MAT_GLOBAL_SUM) { 1632 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1633 1634 info->nz_used = irecv[0]; 1635 info->nz_allocated = irecv[1]; 1636 info->nz_unneeded = irecv[2]; 1637 info->memory = irecv[3]; 1638 info->mallocs = irecv[4]; 1639 } 1640 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1641 info->fill_ratio_needed = 0; 1642 info->factor_mallocs = 0; 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1647 { 1648 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1649 1650 PetscFunctionBegin; 1651 switch (op) { 1652 case MAT_NEW_NONZERO_LOCATIONS: 1653 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1654 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1655 case MAT_KEEP_NONZERO_PATTERN: 1656 case MAT_NEW_NONZERO_LOCATION_ERR: 1657 case MAT_USE_INODES: 1658 case MAT_IGNORE_ZERO_ENTRIES: 1659 case MAT_FORM_EXPLICIT_TRANSPOSE: 1660 MatCheckPreallocated(A, 1); 1661 PetscCall(MatSetOption(a->A, op, flg)); 1662 PetscCall(MatSetOption(a->B, op, flg)); 1663 break; 1664 case MAT_ROW_ORIENTED: 1665 MatCheckPreallocated(A, 1); 1666 a->roworiented = flg; 1667 1668 PetscCall(MatSetOption(a->A, op, flg)); 1669 PetscCall(MatSetOption(a->B, op, flg)); 1670 break; 1671 case MAT_FORCE_DIAGONAL_ENTRIES: 1672 case MAT_SORTED_FULL: 1673 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1674 break; 1675 case MAT_IGNORE_OFF_PROC_ENTRIES: 1676 a->donotstash = flg; 1677 break; 1678 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1679 case MAT_SPD: 1680 case MAT_SYMMETRIC: 1681 case MAT_STRUCTURALLY_SYMMETRIC: 1682 case MAT_HERMITIAN: 1683 case MAT_SYMMETRY_ETERNAL: 1684 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1685 case MAT_SPD_ETERNAL: 1686 /* if the diagonal matrix is square it inherits some of the properties above */ 1687 break; 1688 case MAT_SUBMAT_SINGLEIS: 1689 A->submat_singleis = flg; 1690 break; 1691 case MAT_STRUCTURE_ONLY: 1692 /* The option is handled directly by MatSetOption() */ 1693 break; 1694 default: 1695 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1696 } 1697 PetscFunctionReturn(PETSC_SUCCESS); 1698 } 1699 1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1701 { 1702 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1703 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1704 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1705 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1706 PetscInt *cmap, *idx_p; 1707 1708 PetscFunctionBegin; 1709 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1710 mat->getrowactive = PETSC_TRUE; 1711 1712 if (!mat->rowvalues && (idx || v)) { 1713 /* 1714 allocate enough space to hold information from the longest row. 1715 */ 1716 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1717 PetscInt max = 1, tmp; 1718 for (i = 0; i < matin->rmap->n; i++) { 1719 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1720 if (max < tmp) max = tmp; 1721 } 1722 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1723 } 1724 1725 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1726 lrow = row - rstart; 1727 1728 pvA = &vworkA; 1729 pcA = &cworkA; 1730 pvB = &vworkB; 1731 pcB = &cworkB; 1732 if (!v) { 1733 pvA = NULL; 1734 pvB = NULL; 1735 } 1736 if (!idx) { 1737 pcA = NULL; 1738 if (!v) pcB = NULL; 1739 } 1740 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1741 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1742 nztot = nzA + nzB; 1743 1744 cmap = mat->garray; 1745 if (v || idx) { 1746 if (nztot) { 1747 /* Sort by increasing column numbers, assuming A and B already sorted */ 1748 PetscInt imark = -1; 1749 if (v) { 1750 *v = v_p = mat->rowvalues; 1751 for (i = 0; i < nzB; i++) { 1752 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1753 else break; 1754 } 1755 imark = i; 1756 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1757 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1758 } 1759 if (idx) { 1760 *idx = idx_p = mat->rowindices; 1761 if (imark > -1) { 1762 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1763 } else { 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1766 else break; 1767 } 1768 imark = i; 1769 } 1770 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1771 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1772 } 1773 } else { 1774 if (idx) *idx = NULL; 1775 if (v) *v = NULL; 1776 } 1777 } 1778 *nz = nztot; 1779 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1780 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1787 1788 PetscFunctionBegin; 1789 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1790 aij->getrowactive = PETSC_FALSE; 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1797 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1798 PetscInt i, j, cstart = mat->cmap->rstart; 1799 PetscReal sum = 0.0; 1800 const MatScalar *v, *amata, *bmata; 1801 1802 PetscFunctionBegin; 1803 if (aij->size == 1) { 1804 PetscCall(MatNorm(aij->A, type, norm)); 1805 } else { 1806 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1807 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1808 if (type == NORM_FROBENIUS) { 1809 v = amata; 1810 for (i = 0; i < amat->nz; i++) { 1811 sum += PetscRealPart(PetscConj(*v) * (*v)); 1812 v++; 1813 } 1814 v = bmata; 1815 for (i = 0; i < bmat->nz; i++) { 1816 sum += PetscRealPart(PetscConj(*v) * (*v)); 1817 v++; 1818 } 1819 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1820 *norm = PetscSqrtReal(*norm); 1821 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1822 } else if (type == NORM_1) { /* max column norm */ 1823 PetscReal *tmp, *tmp2; 1824 PetscInt *jj, *garray = aij->garray; 1825 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1826 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1827 *norm = 0.0; 1828 v = amata; 1829 jj = amat->j; 1830 for (j = 0; j < amat->nz; j++) { 1831 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1832 v++; 1833 } 1834 v = bmata; 1835 jj = bmat->j; 1836 for (j = 0; j < bmat->nz; j++) { 1837 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1838 v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1841 for (j = 0; j < mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j = 0; j < aij->A->rmap->n; j++) { 1850 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1851 sum = 0.0; 1852 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1857 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1864 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1866 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1867 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1868 } 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1875 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1876 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1877 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1878 Mat B, A_diag, *B_diag; 1879 const MatScalar *pbv, *bv; 1880 1881 PetscFunctionBegin; 1882 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1883 ma = A->rmap->n; 1884 na = A->cmap->n; 1885 mb = a->B->rmap->n; 1886 nb = a->B->cmap->n; 1887 ai = Aloc->i; 1888 aj = Aloc->j; 1889 bi = Bloc->i; 1890 bj = Bloc->j; 1891 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1892 PetscInt *d_nnz, *g_nnz, *o_nnz; 1893 PetscSFNode *oloc; 1894 PETSC_UNUSED PetscSF sf; 1895 1896 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1897 /* compute d_nnz for preallocation */ 1898 PetscCall(PetscArrayzero(d_nnz, na)); 1899 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1900 /* compute local off-diagonal contributions */ 1901 PetscCall(PetscArrayzero(g_nnz, nb)); 1902 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1903 /* map those to global */ 1904 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1905 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1906 PetscCall(PetscSFSetFromOptions(sf)); 1907 PetscCall(PetscArrayzero(o_nnz, na)); 1908 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1909 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1910 PetscCall(PetscSFDestroy(&sf)); 1911 1912 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1913 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1914 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1915 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1916 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1917 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1918 } else { 1919 B = *matout; 1920 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1921 } 1922 1923 b = (Mat_MPIAIJ *)B->data; 1924 A_diag = a->A; 1925 B_diag = &b->A; 1926 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1927 A_diag_ncol = A_diag->cmap->N; 1928 B_diag_ilen = sub_B_diag->ilen; 1929 B_diag_i = sub_B_diag->i; 1930 1931 /* Set ilen for diagonal of B */ 1932 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1933 1934 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1935 very quickly (=without using MatSetValues), because all writes are local. */ 1936 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1937 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1938 1939 /* copy over the B part */ 1940 PetscCall(PetscMalloc1(bi[mb], &cols)); 1941 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1942 pbv = bv; 1943 row = A->rmap->rstart; 1944 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1945 cols_tmp = cols; 1946 for (i = 0; i < mb; i++) { 1947 ncol = bi[i + 1] - bi[i]; 1948 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1949 row++; 1950 if (pbv) pbv += ncol; 1951 if (cols_tmp) cols_tmp += ncol; 1952 } 1953 PetscCall(PetscFree(cols)); 1954 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1955 1956 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1957 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1959 *matout = B; 1960 } else { 1961 PetscCall(MatHeaderMerge(A, &B)); 1962 } 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1967 { 1968 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1969 Mat a = aij->A, b = aij->B; 1970 PetscInt s1, s2, s3; 1971 1972 PetscFunctionBegin; 1973 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1974 if (rr) { 1975 PetscCall(VecGetLocalSize(rr, &s1)); 1976 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1977 /* Overlap communication with computation. */ 1978 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1979 } 1980 if (ll) { 1981 PetscCall(VecGetLocalSize(ll, &s1)); 1982 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1983 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1984 } 1985 /* scale the diagonal block */ 1986 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1987 1988 if (rr) { 1989 /* Do a scatter end and then right scale the off-diagonal block */ 1990 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1991 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1992 } 1993 PetscFunctionReturn(PETSC_SUCCESS); 1994 } 1995 1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1997 { 1998 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1999 2000 PetscFunctionBegin; 2001 PetscCall(MatSetUnfactored(a->A)); 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2006 { 2007 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2008 Mat a, b, c, d; 2009 PetscBool flg; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; 2013 b = matA->B; 2014 c = matB->A; 2015 d = matB->B; 2016 2017 PetscCall(MatEqual(a, c, &flg)); 2018 if (flg) PetscCall(MatEqual(b, d, &flg)); 2019 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2024 { 2025 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2026 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2027 2028 PetscFunctionBegin; 2029 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2030 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2031 /* because of the column compression in the off-processor part of the matrix a->B, 2032 the number of columns in a->B and b->B may be different, hence we cannot call 2033 the MatCopy() directly on the two parts. If need be, we can provide a more 2034 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2035 then copying the submatrices */ 2036 PetscCall(MatCopy_Basic(A, B, str)); 2037 } else { 2038 PetscCall(MatCopy(a->A, b->A, str)); 2039 PetscCall(MatCopy(a->B, b->B, str)); 2040 } 2041 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 /* 2046 Computes the number of nonzeros per row needed for preallocation when X and Y 2047 have different nonzero structure. 2048 */ 2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2050 { 2051 PetscInt i, j, k, nzx, nzy; 2052 2053 PetscFunctionBegin; 2054 /* Set the number of nonzeros in the new matrix */ 2055 for (i = 0; i < m; i++) { 2056 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2057 nzx = xi[i + 1] - xi[i]; 2058 nzy = yi[i + 1] - yi[i]; 2059 nnz[i] = 0; 2060 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2061 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2062 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2063 nnz[i]++; 2064 } 2065 for (; k < nzy; k++) nnz[i]++; 2066 } 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2072 { 2073 PetscInt m = Y->rmap->N; 2074 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2075 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2076 2077 PetscFunctionBegin; 2078 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2083 { 2084 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2085 2086 PetscFunctionBegin; 2087 if (str == SAME_NONZERO_PATTERN) { 2088 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2089 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2090 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2091 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2092 } else { 2093 Mat B; 2094 PetscInt *nnz_d, *nnz_o; 2095 2096 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2097 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2098 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2099 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2100 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2101 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2102 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2103 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2104 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2105 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2106 PetscCall(MatHeaderMerge(Y, &B)); 2107 PetscCall(PetscFree(nnz_d)); 2108 PetscCall(PetscFree(nnz_o)); 2109 } 2110 PetscFunctionReturn(PETSC_SUCCESS); 2111 } 2112 2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2114 2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2116 { 2117 PetscFunctionBegin; 2118 if (PetscDefined(USE_COMPLEX)) { 2119 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2120 2121 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2122 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2130 2131 PetscFunctionBegin; 2132 PetscCall(MatRealPart(a->A)); 2133 PetscCall(MatRealPart(a->B)); 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatImaginaryPart(a->A)); 2143 PetscCall(MatImaginaryPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 PetscInt i, *idxb = NULL, m = A->rmap->n; 2151 PetscScalar *va, *vv; 2152 Vec vB, vA; 2153 const PetscScalar *vb; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2157 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2158 2159 PetscCall(VecGetArrayWrite(vA, &va)); 2160 if (idx) { 2161 for (i = 0; i < m; i++) { 2162 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2163 } 2164 } 2165 2166 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2167 PetscCall(PetscMalloc1(m, &idxb)); 2168 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2169 2170 PetscCall(VecGetArrayWrite(v, &vv)); 2171 PetscCall(VecGetArrayRead(vB, &vb)); 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2174 vv[i] = vb[i]; 2175 if (idx) idx[i] = a->garray[idxb[i]]; 2176 } else { 2177 vv[i] = va[i]; 2178 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2182 PetscCall(VecRestoreArrayWrite(vA, &va)); 2183 PetscCall(VecRestoreArrayRead(vB, &vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2191 { 2192 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2193 Vec vB, vA; 2194 2195 PetscFunctionBegin; 2196 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2197 PetscCall(MatGetRowSumAbs(a->A, vA)); 2198 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2199 PetscCall(MatGetRowSumAbs(a->B, vB)); 2200 PetscCall(VecAXPY(vA, 1.0, vB)); 2201 PetscCall(VecDestroy(&vB)); 2202 PetscCall(VecCopy(vA, v)); 2203 PetscCall(VecDestroy(&vA)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2210 PetscInt m = A->rmap->n, n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba, *bav; 2217 PetscInt r, j, col, ncols, *bi, *bj; 2218 Mat B = mat->B; 2219 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2220 2221 PetscFunctionBegin; 2222 /* When a process holds entire A and other processes have no entry */ 2223 if (A->cmap->N == n) { 2224 PetscCall(VecGetArrayWrite(v, &diagA)); 2225 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2226 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2227 PetscCall(VecDestroy(&diagV)); 2228 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } else if (n == 0) { 2231 if (m) { 2232 PetscCall(VecGetArrayWrite(v, &a)); 2233 for (r = 0; r < m; r++) { 2234 a[r] = 0.0; 2235 if (idx) idx[r] = -1; 2236 } 2237 PetscCall(VecRestoreArrayWrite(v, &a)); 2238 } 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2243 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2244 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2245 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2246 2247 /* Get offdiagIdx[] for implicit 0.0 */ 2248 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2249 ba = bav; 2250 bi = b->i; 2251 bj = b->j; 2252 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2253 for (r = 0; r < m; r++) { 2254 ncols = bi[r + 1] - bi[r]; 2255 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2256 offdiagA[r] = *ba; 2257 offdiagIdx[r] = cmap[0]; 2258 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2259 offdiagA[r] = 0.0; 2260 2261 /* Find first hole in the cmap */ 2262 for (j = 0; j < ncols; j++) { 2263 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2264 if (col > j && j < cstart) { 2265 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2266 break; 2267 } else if (col > j + n && j >= cstart) { 2268 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2269 break; 2270 } 2271 } 2272 if (j == ncols && ncols < A->cmap->N - n) { 2273 /* a hole is outside compressed Bcols */ 2274 if (ncols == 0) { 2275 if (cstart) { 2276 offdiagIdx[r] = 0; 2277 } else offdiagIdx[r] = cend; 2278 } else { /* ncols > 0 */ 2279 offdiagIdx[r] = cmap[ncols - 1] + 1; 2280 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2281 } 2282 } 2283 } 2284 2285 for (j = 0; j < ncols; j++) { 2286 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2287 offdiagA[r] = *ba; 2288 offdiagIdx[r] = cmap[*bj]; 2289 } 2290 ba++; 2291 bj++; 2292 } 2293 } 2294 2295 PetscCall(VecGetArrayWrite(v, &a)); 2296 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2297 for (r = 0; r < m; ++r) { 2298 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) idx[r] = cstart + diagIdx[r]; 2301 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) { 2304 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2305 idx[r] = cstart + diagIdx[r]; 2306 } else idx[r] = offdiagIdx[r]; 2307 } 2308 } else { 2309 a[r] = offdiagA[r]; 2310 if (idx) idx[r] = offdiagIdx[r]; 2311 } 2312 } 2313 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2314 PetscCall(VecRestoreArrayWrite(v, &a)); 2315 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2316 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2317 PetscCall(VecDestroy(&diagV)); 2318 PetscCall(VecDestroy(&offdiagV)); 2319 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2320 PetscFunctionReturn(PETSC_SUCCESS); 2321 } 2322 2323 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2326 PetscInt m = A->rmap->n, n = A->cmap->n; 2327 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2328 PetscInt *cmap = mat->garray; 2329 PetscInt *diagIdx, *offdiagIdx; 2330 Vec diagV, offdiagV; 2331 PetscScalar *a, *diagA, *offdiagA; 2332 const PetscScalar *ba, *bav; 2333 PetscInt r, j, col, ncols, *bi, *bj; 2334 Mat B = mat->B; 2335 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2336 2337 PetscFunctionBegin; 2338 /* When a process holds entire A and other processes have no entry */ 2339 if (A->cmap->N == n) { 2340 PetscCall(VecGetArrayWrite(v, &diagA)); 2341 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2342 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2343 PetscCall(VecDestroy(&diagV)); 2344 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } else if (n == 0) { 2347 if (m) { 2348 PetscCall(VecGetArrayWrite(v, &a)); 2349 for (r = 0; r < m; r++) { 2350 a[r] = PETSC_MAX_REAL; 2351 if (idx) idx[r] = -1; 2352 } 2353 PetscCall(VecRestoreArrayWrite(v, &a)); 2354 } 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2359 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2361 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2362 2363 /* Get offdiagIdx[] for implicit 0.0 */ 2364 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2365 ba = bav; 2366 bi = b->i; 2367 bj = b->j; 2368 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2369 for (r = 0; r < m; r++) { 2370 ncols = bi[r + 1] - bi[r]; 2371 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2372 offdiagA[r] = *ba; 2373 offdiagIdx[r] = cmap[0]; 2374 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2375 offdiagA[r] = 0.0; 2376 2377 /* Find first hole in the cmap */ 2378 for (j = 0; j < ncols; j++) { 2379 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2380 if (col > j && j < cstart) { 2381 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2382 break; 2383 } else if (col > j + n && j >= cstart) { 2384 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2385 break; 2386 } 2387 } 2388 if (j == ncols && ncols < A->cmap->N - n) { 2389 /* a hole is outside compressed Bcols */ 2390 if (ncols == 0) { 2391 if (cstart) { 2392 offdiagIdx[r] = 0; 2393 } else offdiagIdx[r] = cend; 2394 } else { /* ncols > 0 */ 2395 offdiagIdx[r] = cmap[ncols - 1] + 1; 2396 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2397 } 2398 } 2399 } 2400 2401 for (j = 0; j < ncols; j++) { 2402 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2403 offdiagA[r] = *ba; 2404 offdiagIdx[r] = cmap[*bj]; 2405 } 2406 ba++; 2407 bj++; 2408 } 2409 } 2410 2411 PetscCall(VecGetArrayWrite(v, &a)); 2412 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2413 for (r = 0; r < m; ++r) { 2414 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) idx[r] = cstart + diagIdx[r]; 2417 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2418 a[r] = diagA[r]; 2419 if (idx) { 2420 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2421 idx[r] = cstart + diagIdx[r]; 2422 } else idx[r] = offdiagIdx[r]; 2423 } 2424 } else { 2425 a[r] = offdiagA[r]; 2426 if (idx) idx[r] = offdiagIdx[r]; 2427 } 2428 } 2429 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2430 PetscCall(VecRestoreArrayWrite(v, &a)); 2431 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2432 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2433 PetscCall(VecDestroy(&diagV)); 2434 PetscCall(VecDestroy(&offdiagV)); 2435 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2436 PetscFunctionReturn(PETSC_SUCCESS); 2437 } 2438 2439 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2440 { 2441 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2442 PetscInt m = A->rmap->n, n = A->cmap->n; 2443 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2444 PetscInt *cmap = mat->garray; 2445 PetscInt *diagIdx, *offdiagIdx; 2446 Vec diagV, offdiagV; 2447 PetscScalar *a, *diagA, *offdiagA; 2448 const PetscScalar *ba, *bav; 2449 PetscInt r, j, col, ncols, *bi, *bj; 2450 Mat B = mat->B; 2451 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2452 2453 PetscFunctionBegin; 2454 /* When a process holds entire A and other processes have no entry */ 2455 if (A->cmap->N == n) { 2456 PetscCall(VecGetArrayWrite(v, &diagA)); 2457 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2458 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2459 PetscCall(VecDestroy(&diagV)); 2460 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2461 PetscFunctionReturn(PETSC_SUCCESS); 2462 } else if (n == 0) { 2463 if (m) { 2464 PetscCall(VecGetArrayWrite(v, &a)); 2465 for (r = 0; r < m; r++) { 2466 a[r] = PETSC_MIN_REAL; 2467 if (idx) idx[r] = -1; 2468 } 2469 PetscCall(VecRestoreArrayWrite(v, &a)); 2470 } 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } 2473 2474 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2475 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2476 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2477 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2478 2479 /* Get offdiagIdx[] for implicit 0.0 */ 2480 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2481 ba = bav; 2482 bi = b->i; 2483 bj = b->j; 2484 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2485 for (r = 0; r < m; r++) { 2486 ncols = bi[r + 1] - bi[r]; 2487 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2488 offdiagA[r] = *ba; 2489 offdiagIdx[r] = cmap[0]; 2490 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2491 offdiagA[r] = 0.0; 2492 2493 /* Find first hole in the cmap */ 2494 for (j = 0; j < ncols; j++) { 2495 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2496 if (col > j && j < cstart) { 2497 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2498 break; 2499 } else if (col > j + n && j >= cstart) { 2500 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2501 break; 2502 } 2503 } 2504 if (j == ncols && ncols < A->cmap->N - n) { 2505 /* a hole is outside compressed Bcols */ 2506 if (ncols == 0) { 2507 if (cstart) { 2508 offdiagIdx[r] = 0; 2509 } else offdiagIdx[r] = cend; 2510 } else { /* ncols > 0 */ 2511 offdiagIdx[r] = cmap[ncols - 1] + 1; 2512 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2513 } 2514 } 2515 } 2516 2517 for (j = 0; j < ncols; j++) { 2518 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2519 offdiagA[r] = *ba; 2520 offdiagIdx[r] = cmap[*bj]; 2521 } 2522 ba++; 2523 bj++; 2524 } 2525 } 2526 2527 PetscCall(VecGetArrayWrite(v, &a)); 2528 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2529 for (r = 0; r < m; ++r) { 2530 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) idx[r] = cstart + diagIdx[r]; 2533 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2534 a[r] = diagA[r]; 2535 if (idx) { 2536 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2537 idx[r] = cstart + diagIdx[r]; 2538 } else idx[r] = offdiagIdx[r]; 2539 } 2540 } else { 2541 a[r] = offdiagA[r]; 2542 if (idx) idx[r] = offdiagIdx[r]; 2543 } 2544 } 2545 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2546 PetscCall(VecRestoreArrayWrite(v, &a)); 2547 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2548 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2549 PetscCall(VecDestroy(&diagV)); 2550 PetscCall(VecDestroy(&offdiagV)); 2551 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2556 { 2557 Mat *dummy; 2558 2559 PetscFunctionBegin; 2560 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2561 *newmat = *dummy; 2562 PetscCall(PetscFree(dummy)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2572 A->factorerrortype = a->A->factorerrortype; 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2577 { 2578 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2579 2580 PetscFunctionBegin; 2581 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2582 PetscCall(MatSetRandom(aij->A, rctx)); 2583 if (x->assembled) { 2584 PetscCall(MatSetRandom(aij->B, rctx)); 2585 } else { 2586 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2587 } 2588 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2589 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2594 { 2595 PetscFunctionBegin; 2596 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2597 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 /*@ 2602 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2603 2604 Not Collective 2605 2606 Input Parameter: 2607 . A - the matrix 2608 2609 Output Parameter: 2610 . nz - the number of nonzeros 2611 2612 Level: advanced 2613 2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2615 @*/ 2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2619 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2620 PetscBool isaij; 2621 2622 PetscFunctionBegin; 2623 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2624 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2625 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 /*@ 2630 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2631 2632 Collective 2633 2634 Input Parameters: 2635 + A - the matrix 2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2637 2638 Level: advanced 2639 2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2641 @*/ 2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2643 { 2644 PetscFunctionBegin; 2645 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2650 { 2651 PetscBool sc = PETSC_FALSE, flg; 2652 2653 PetscFunctionBegin; 2654 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2655 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2656 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2657 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2658 PetscOptionsHeadEnd(); 2659 PetscFunctionReturn(PETSC_SUCCESS); 2660 } 2661 2662 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2663 { 2664 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2665 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2666 2667 PetscFunctionBegin; 2668 if (!Y->preallocated) { 2669 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2670 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2671 PetscInt nonew = aij->nonew; 2672 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2673 aij->nonew = nonew; 2674 } 2675 PetscCall(MatShift_Basic(Y, a)); 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2680 { 2681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2682 2683 PetscFunctionBegin; 2684 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2685 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2686 if (d) { 2687 PetscInt rstart; 2688 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2689 *d += rstart; 2690 } 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2697 2698 PetscFunctionBegin; 2699 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2704 { 2705 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2706 2707 PetscFunctionBegin; 2708 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2709 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2714 MatGetRow_MPIAIJ, 2715 MatRestoreRow_MPIAIJ, 2716 MatMult_MPIAIJ, 2717 /* 4*/ MatMultAdd_MPIAIJ, 2718 MatMultTranspose_MPIAIJ, 2719 MatMultTransposeAdd_MPIAIJ, 2720 NULL, 2721 NULL, 2722 NULL, 2723 /*10*/ NULL, 2724 NULL, 2725 NULL, 2726 MatSOR_MPIAIJ, 2727 MatTranspose_MPIAIJ, 2728 /*15*/ MatGetInfo_MPIAIJ, 2729 MatEqual_MPIAIJ, 2730 MatGetDiagonal_MPIAIJ, 2731 MatDiagonalScale_MPIAIJ, 2732 MatNorm_MPIAIJ, 2733 /*20*/ MatAssemblyBegin_MPIAIJ, 2734 MatAssemblyEnd_MPIAIJ, 2735 MatSetOption_MPIAIJ, 2736 MatZeroEntries_MPIAIJ, 2737 /*24*/ MatZeroRows_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 NULL, 2742 /*29*/ MatSetUp_MPI_Hash, 2743 NULL, 2744 NULL, 2745 MatGetDiagonalBlock_MPIAIJ, 2746 NULL, 2747 /*34*/ MatDuplicate_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*39*/ MatAXPY_MPIAIJ, 2753 MatCreateSubMatrices_MPIAIJ, 2754 MatIncreaseOverlap_MPIAIJ, 2755 MatGetValues_MPIAIJ, 2756 MatCopy_MPIAIJ, 2757 /*44*/ MatGetRowMax_MPIAIJ, 2758 MatScale_MPIAIJ, 2759 MatShift_MPIAIJ, 2760 MatDiagonalSet_MPIAIJ, 2761 MatZeroRowsColumns_MPIAIJ, 2762 /*49*/ MatSetRandom_MPIAIJ, 2763 MatGetRowIJ_MPIAIJ, 2764 MatRestoreRowIJ_MPIAIJ, 2765 NULL, 2766 NULL, 2767 /*54*/ MatFDColoringCreate_MPIXAIJ, 2768 NULL, 2769 MatSetUnfactored_MPIAIJ, 2770 MatPermute_MPIAIJ, 2771 NULL, 2772 /*59*/ MatCreateSubMatrix_MPIAIJ, 2773 MatDestroy_MPIAIJ, 2774 MatView_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*64*/ NULL, 2778 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2783 MatGetRowMinAbs_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 NULL, 2788 /*75*/ MatFDColoringApply_AIJ, 2789 MatSetFromOptions_MPIAIJ, 2790 NULL, 2791 NULL, 2792 MatFindZeroDiagonals_MPIAIJ, 2793 /*80*/ NULL, 2794 NULL, 2795 NULL, 2796 /*83*/ MatLoad_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*89*/ NULL, 2803 NULL, 2804 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 MatBindToCPU_MPIAIJ, 2812 /*99*/ MatProductSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatConjugate_MPIAIJ, 2816 NULL, 2817 /*104*/ MatSetValuesRow_MPIAIJ, 2818 MatRealPart_MPIAIJ, 2819 MatImaginaryPart_MPIAIJ, 2820 NULL, 2821 NULL, 2822 /*109*/ NULL, 2823 NULL, 2824 MatGetRowMin_MPIAIJ, 2825 NULL, 2826 MatMissingDiagonal_MPIAIJ, 2827 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2828 NULL, 2829 MatGetGhosts_MPIAIJ, 2830 NULL, 2831 NULL, 2832 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2833 NULL, 2834 NULL, 2835 NULL, 2836 MatGetMultiProcBlock_MPIAIJ, 2837 /*124*/ MatFindNonzeroRows_MPIAIJ, 2838 MatGetColumnReductions_MPIAIJ, 2839 MatInvertBlockDiagonal_MPIAIJ, 2840 MatInvertVariableBlockDiagonal_MPIAIJ, 2841 MatCreateSubMatricesMPI_MPIAIJ, 2842 /*129*/ NULL, 2843 NULL, 2844 NULL, 2845 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2846 NULL, 2847 /*134*/ NULL, 2848 NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 /*139*/ MatSetBlockSizes_MPIAIJ, 2853 NULL, 2854 NULL, 2855 MatFDColoringSetUp_MPIXAIJ, 2856 MatFindOffBlockDiagonalEntries_MPIAIJ, 2857 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2858 /*145*/ NULL, 2859 NULL, 2860 NULL, 2861 MatCreateGraph_Simple_AIJ, 2862 NULL, 2863 /*150*/ NULL, 2864 MatEliminateZeros_MPIAIJ, 2865 MatGetRowSumAbs_MPIAIJ}; 2866 2867 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2868 { 2869 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2870 2871 PetscFunctionBegin; 2872 PetscCall(MatStoreValues(aij->A)); 2873 PetscCall(MatStoreValues(aij->B)); 2874 PetscFunctionReturn(PETSC_SUCCESS); 2875 } 2876 2877 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2878 { 2879 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2880 2881 PetscFunctionBegin; 2882 PetscCall(MatRetrieveValues(aij->A)); 2883 PetscCall(MatRetrieveValues(aij->B)); 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2888 { 2889 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2890 PetscMPIInt size; 2891 2892 PetscFunctionBegin; 2893 if (B->hash_active) { 2894 B->ops[0] = b->cops; 2895 B->hash_active = PETSC_FALSE; 2896 } 2897 PetscCall(PetscLayoutSetUp(B->rmap)); 2898 PetscCall(PetscLayoutSetUp(B->cmap)); 2899 2900 #if defined(PETSC_USE_CTABLE) 2901 PetscCall(PetscHMapIDestroy(&b->colmap)); 2902 #else 2903 PetscCall(PetscFree(b->colmap)); 2904 #endif 2905 PetscCall(PetscFree(b->garray)); 2906 PetscCall(VecDestroy(&b->lvec)); 2907 PetscCall(VecScatterDestroy(&b->Mvctx)); 2908 2909 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2910 2911 MatSeqXAIJGetOptions_Private(b->B); 2912 PetscCall(MatDestroy(&b->B)); 2913 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2914 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2915 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2916 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2917 MatSeqXAIJRestoreOptions_Private(b->B); 2918 2919 MatSeqXAIJGetOptions_Private(b->A); 2920 PetscCall(MatDestroy(&b->A)); 2921 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2922 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2923 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2924 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2925 MatSeqXAIJRestoreOptions_Private(b->A); 2926 2927 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2928 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2929 B->preallocated = PETSC_TRUE; 2930 B->was_assembled = PETSC_FALSE; 2931 B->assembled = PETSC_FALSE; 2932 PetscFunctionReturn(PETSC_SUCCESS); 2933 } 2934 2935 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2936 { 2937 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2938 2939 PetscFunctionBegin; 2940 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2941 PetscCall(PetscLayoutSetUp(B->rmap)); 2942 PetscCall(PetscLayoutSetUp(B->cmap)); 2943 2944 #if defined(PETSC_USE_CTABLE) 2945 PetscCall(PetscHMapIDestroy(&b->colmap)); 2946 #else 2947 PetscCall(PetscFree(b->colmap)); 2948 #endif 2949 PetscCall(PetscFree(b->garray)); 2950 PetscCall(VecDestroy(&b->lvec)); 2951 PetscCall(VecScatterDestroy(&b->Mvctx)); 2952 2953 PetscCall(MatResetPreallocation(b->A)); 2954 PetscCall(MatResetPreallocation(b->B)); 2955 B->preallocated = PETSC_TRUE; 2956 B->was_assembled = PETSC_FALSE; 2957 B->assembled = PETSC_FALSE; 2958 PetscFunctionReturn(PETSC_SUCCESS); 2959 } 2960 2961 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2962 { 2963 Mat mat; 2964 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2965 2966 PetscFunctionBegin; 2967 *newmat = NULL; 2968 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2969 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2970 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2971 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2972 a = (Mat_MPIAIJ *)mat->data; 2973 2974 mat->factortype = matin->factortype; 2975 mat->assembled = matin->assembled; 2976 mat->insertmode = NOT_SET_VALUES; 2977 2978 a->size = oldmat->size; 2979 a->rank = oldmat->rank; 2980 a->donotstash = oldmat->donotstash; 2981 a->roworiented = oldmat->roworiented; 2982 a->rowindices = NULL; 2983 a->rowvalues = NULL; 2984 a->getrowactive = PETSC_FALSE; 2985 2986 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2987 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2988 if (matin->hash_active) { 2989 PetscCall(MatSetUp(mat)); 2990 } else { 2991 mat->preallocated = matin->preallocated; 2992 if (oldmat->colmap) { 2993 #if defined(PETSC_USE_CTABLE) 2994 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2995 #else 2996 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2997 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2998 #endif 2999 } else a->colmap = NULL; 3000 if (oldmat->garray) { 3001 PetscInt len; 3002 len = oldmat->B->cmap->n; 3003 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3004 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3005 } else a->garray = NULL; 3006 3007 /* It may happen MatDuplicate is called with a non-assembled matrix 3008 In fact, MatDuplicate only requires the matrix to be preallocated 3009 This may happen inside a DMCreateMatrix_Shell */ 3010 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3011 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3012 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3013 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3014 } 3015 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3016 *newmat = mat; 3017 PetscFunctionReturn(PETSC_SUCCESS); 3018 } 3019 3020 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3021 { 3022 PetscBool isbinary, ishdf5; 3023 3024 PetscFunctionBegin; 3025 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3026 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3027 /* force binary viewer to load .info file if it has not yet done so */ 3028 PetscCall(PetscViewerSetUp(viewer)); 3029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3031 if (isbinary) { 3032 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3033 } else if (ishdf5) { 3034 #if defined(PETSC_HAVE_HDF5) 3035 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3036 #else 3037 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3038 #endif 3039 } else { 3040 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3041 } 3042 PetscFunctionReturn(PETSC_SUCCESS); 3043 } 3044 3045 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3046 { 3047 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3048 PetscInt *rowidxs, *colidxs; 3049 PetscScalar *matvals; 3050 3051 PetscFunctionBegin; 3052 PetscCall(PetscViewerSetUp(viewer)); 3053 3054 /* read in matrix header */ 3055 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3056 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3057 M = header[1]; 3058 N = header[2]; 3059 nz = header[3]; 3060 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3061 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3062 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3063 3064 /* set block sizes from the viewer's .info file */ 3065 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3066 /* set global sizes if not set already */ 3067 if (mat->rmap->N < 0) mat->rmap->N = M; 3068 if (mat->cmap->N < 0) mat->cmap->N = N; 3069 PetscCall(PetscLayoutSetUp(mat->rmap)); 3070 PetscCall(PetscLayoutSetUp(mat->cmap)); 3071 3072 /* check if the matrix sizes are correct */ 3073 PetscCall(MatGetSize(mat, &rows, &cols)); 3074 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3075 3076 /* read in row lengths and build row indices */ 3077 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3078 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3079 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3080 rowidxs[0] = 0; 3081 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3082 if (nz != PETSC_MAX_INT) { 3083 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3084 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3085 } 3086 3087 /* read in column indices and matrix values */ 3088 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3089 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3090 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3091 /* store matrix indices and values */ 3092 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3093 PetscCall(PetscFree(rowidxs)); 3094 PetscCall(PetscFree2(colidxs, matvals)); 3095 PetscFunctionReturn(PETSC_SUCCESS); 3096 } 3097 3098 /* Not scalable because of ISAllGather() unless getting all columns. */ 3099 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3100 { 3101 IS iscol_local; 3102 PetscBool isstride; 3103 PetscMPIInt lisstride = 0, gisstride; 3104 3105 PetscFunctionBegin; 3106 /* check if we are grabbing all columns*/ 3107 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3108 3109 if (isstride) { 3110 PetscInt start, len, mstart, mlen; 3111 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3112 PetscCall(ISGetLocalSize(iscol, &len)); 3113 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3114 if (mstart == start && mlen - mstart == len) lisstride = 1; 3115 } 3116 3117 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3118 if (gisstride) { 3119 PetscInt N; 3120 PetscCall(MatGetSize(mat, NULL, &N)); 3121 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3122 PetscCall(ISSetIdentity(iscol_local)); 3123 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3124 } else { 3125 PetscInt cbs; 3126 PetscCall(ISGetBlockSize(iscol, &cbs)); 3127 PetscCall(ISAllGather(iscol, &iscol_local)); 3128 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3129 } 3130 3131 *isseq = iscol_local; 3132 PetscFunctionReturn(PETSC_SUCCESS); 3133 } 3134 3135 /* 3136 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3137 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3138 3139 Input Parameters: 3140 + mat - matrix 3141 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3142 i.e., mat->rstart <= isrow[i] < mat->rend 3143 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3144 i.e., mat->cstart <= iscol[i] < mat->cend 3145 3146 Output Parameters: 3147 + isrow_d - sequential row index set for retrieving mat->A 3148 . iscol_d - sequential column index set for retrieving mat->A 3149 . iscol_o - sequential column index set for retrieving mat->B 3150 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3151 */ 3152 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3153 { 3154 Vec x, cmap; 3155 const PetscInt *is_idx; 3156 PetscScalar *xarray, *cmaparray; 3157 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3159 Mat B = a->B; 3160 Vec lvec = a->lvec, lcmap; 3161 PetscInt i, cstart, cend, Bn = B->cmap->N; 3162 MPI_Comm comm; 3163 VecScatter Mvctx = a->Mvctx; 3164 3165 PetscFunctionBegin; 3166 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3167 PetscCall(ISGetLocalSize(iscol, &ncols)); 3168 3169 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3170 PetscCall(MatCreateVecs(mat, &x, NULL)); 3171 PetscCall(VecSet(x, -1.0)); 3172 PetscCall(VecDuplicate(x, &cmap)); 3173 PetscCall(VecSet(cmap, -1.0)); 3174 3175 /* Get start indices */ 3176 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3177 isstart -= ncols; 3178 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3179 3180 PetscCall(ISGetIndices(iscol, &is_idx)); 3181 PetscCall(VecGetArray(x, &xarray)); 3182 PetscCall(VecGetArray(cmap, &cmaparray)); 3183 PetscCall(PetscMalloc1(ncols, &idx)); 3184 for (i = 0; i < ncols; i++) { 3185 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3186 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3187 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3188 } 3189 PetscCall(VecRestoreArray(x, &xarray)); 3190 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3191 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3192 3193 /* Get iscol_d */ 3194 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3195 PetscCall(ISGetBlockSize(iscol, &i)); 3196 PetscCall(ISSetBlockSize(*iscol_d, i)); 3197 3198 /* Get isrow_d */ 3199 PetscCall(ISGetLocalSize(isrow, &m)); 3200 rstart = mat->rmap->rstart; 3201 PetscCall(PetscMalloc1(m, &idx)); 3202 PetscCall(ISGetIndices(isrow, &is_idx)); 3203 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3204 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3205 3206 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3207 PetscCall(ISGetBlockSize(isrow, &i)); 3208 PetscCall(ISSetBlockSize(*isrow_d, i)); 3209 3210 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3211 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3212 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3213 3214 PetscCall(VecDuplicate(lvec, &lcmap)); 3215 3216 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3217 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3218 3219 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3220 /* off-process column indices */ 3221 count = 0; 3222 PetscCall(PetscMalloc1(Bn, &idx)); 3223 PetscCall(PetscMalloc1(Bn, &cmap1)); 3224 3225 PetscCall(VecGetArray(lvec, &xarray)); 3226 PetscCall(VecGetArray(lcmap, &cmaparray)); 3227 for (i = 0; i < Bn; i++) { 3228 if (PetscRealPart(xarray[i]) > -1.0) { 3229 idx[count] = i; /* local column index in off-diagonal part B */ 3230 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3231 count++; 3232 } 3233 } 3234 PetscCall(VecRestoreArray(lvec, &xarray)); 3235 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3236 3237 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3238 /* cannot ensure iscol_o has same blocksize as iscol! */ 3239 3240 PetscCall(PetscFree(idx)); 3241 *garray = cmap1; 3242 3243 PetscCall(VecDestroy(&x)); 3244 PetscCall(VecDestroy(&cmap)); 3245 PetscCall(VecDestroy(&lcmap)); 3246 PetscFunctionReturn(PETSC_SUCCESS); 3247 } 3248 3249 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3250 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3251 { 3252 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3253 Mat M = NULL; 3254 MPI_Comm comm; 3255 IS iscol_d, isrow_d, iscol_o; 3256 Mat Asub = NULL, Bsub = NULL; 3257 PetscInt n; 3258 3259 PetscFunctionBegin; 3260 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3261 3262 if (call == MAT_REUSE_MATRIX) { 3263 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3264 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3265 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3266 3267 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3268 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3269 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3271 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3272 3273 /* Update diagonal and off-diagonal portions of submat */ 3274 asub = (Mat_MPIAIJ *)(*submat)->data; 3275 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3276 PetscCall(ISGetLocalSize(iscol_o, &n)); 3277 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3278 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3279 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3280 3281 } else { /* call == MAT_INITIAL_MATRIX) */ 3282 const PetscInt *garray; 3283 PetscInt BsubN; 3284 3285 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3286 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3287 3288 /* Create local submatrices Asub and Bsub */ 3289 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3290 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3291 3292 /* Create submatrix M */ 3293 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3294 3295 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3296 asub = (Mat_MPIAIJ *)M->data; 3297 3298 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3299 n = asub->B->cmap->N; 3300 if (BsubN > n) { 3301 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3302 const PetscInt *idx; 3303 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3304 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3305 3306 PetscCall(PetscMalloc1(n, &idx_new)); 3307 j = 0; 3308 PetscCall(ISGetIndices(iscol_o, &idx)); 3309 for (i = 0; i < n; i++) { 3310 if (j >= BsubN) break; 3311 while (subgarray[i] > garray[j]) j++; 3312 3313 if (subgarray[i] == garray[j]) { 3314 idx_new[i] = idx[j++]; 3315 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3316 } 3317 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3318 3319 PetscCall(ISDestroy(&iscol_o)); 3320 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3321 3322 } else if (BsubN < n) { 3323 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3324 } 3325 3326 PetscCall(PetscFree(garray)); 3327 *submat = M; 3328 3329 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3330 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3331 PetscCall(ISDestroy(&isrow_d)); 3332 3333 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3334 PetscCall(ISDestroy(&iscol_d)); 3335 3336 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3337 PetscCall(ISDestroy(&iscol_o)); 3338 } 3339 PetscFunctionReturn(PETSC_SUCCESS); 3340 } 3341 3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3343 { 3344 IS iscol_local = NULL, isrow_d; 3345 PetscInt csize; 3346 PetscInt n, i, j, start, end; 3347 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3348 MPI_Comm comm; 3349 3350 PetscFunctionBegin; 3351 /* If isrow has same processor distribution as mat, 3352 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3353 if (call == MAT_REUSE_MATRIX) { 3354 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3355 if (isrow_d) { 3356 sameRowDist = PETSC_TRUE; 3357 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3358 } else { 3359 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3360 if (iscol_local) { 3361 sameRowDist = PETSC_TRUE; 3362 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3363 } 3364 } 3365 } else { 3366 /* Check if isrow has same processor distribution as mat */ 3367 sameDist[0] = PETSC_FALSE; 3368 PetscCall(ISGetLocalSize(isrow, &n)); 3369 if (!n) { 3370 sameDist[0] = PETSC_TRUE; 3371 } else { 3372 PetscCall(ISGetMinMax(isrow, &i, &j)); 3373 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3374 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3375 } 3376 3377 /* Check if iscol has same processor distribution as mat */ 3378 sameDist[1] = PETSC_FALSE; 3379 PetscCall(ISGetLocalSize(iscol, &n)); 3380 if (!n) { 3381 sameDist[1] = PETSC_TRUE; 3382 } else { 3383 PetscCall(ISGetMinMax(iscol, &i, &j)); 3384 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3385 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3386 } 3387 3388 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3389 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3390 sameRowDist = tsameDist[0]; 3391 } 3392 3393 if (sameRowDist) { 3394 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3395 /* isrow and iscol have same processor distribution as mat */ 3396 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3397 PetscFunctionReturn(PETSC_SUCCESS); 3398 } else { /* sameRowDist */ 3399 /* isrow has same processor distribution as mat */ 3400 if (call == MAT_INITIAL_MATRIX) { 3401 PetscBool sorted; 3402 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3403 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3404 PetscCall(ISGetSize(iscol, &i)); 3405 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3406 3407 PetscCall(ISSorted(iscol_local, &sorted)); 3408 if (sorted) { 3409 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3410 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3411 PetscFunctionReturn(PETSC_SUCCESS); 3412 } 3413 } else { /* call == MAT_REUSE_MATRIX */ 3414 IS iscol_sub; 3415 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3416 if (iscol_sub) { 3417 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3418 PetscFunctionReturn(PETSC_SUCCESS); 3419 } 3420 } 3421 } 3422 } 3423 3424 /* General case: iscol -> iscol_local which has global size of iscol */ 3425 if (call == MAT_REUSE_MATRIX) { 3426 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3427 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3428 } else { 3429 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3430 } 3431 3432 PetscCall(ISGetLocalSize(iscol, &csize)); 3433 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3434 3435 if (call == MAT_INITIAL_MATRIX) { 3436 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3437 PetscCall(ISDestroy(&iscol_local)); 3438 } 3439 PetscFunctionReturn(PETSC_SUCCESS); 3440 } 3441 3442 /*@C 3443 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3444 and "off-diagonal" part of the matrix in CSR format. 3445 3446 Collective 3447 3448 Input Parameters: 3449 + comm - MPI communicator 3450 . A - "diagonal" portion of matrix 3451 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3452 - garray - global index of `B` columns 3453 3454 Output Parameter: 3455 . mat - the matrix, with input `A` as its local diagonal matrix 3456 3457 Level: advanced 3458 3459 Notes: 3460 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3461 3462 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3463 3464 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3465 @*/ 3466 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3467 { 3468 Mat_MPIAIJ *maij; 3469 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3470 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3471 const PetscScalar *oa; 3472 Mat Bnew; 3473 PetscInt m, n, N; 3474 MatType mpi_mat_type; 3475 3476 PetscFunctionBegin; 3477 PetscCall(MatCreate(comm, mat)); 3478 PetscCall(MatGetSize(A, &m, &n)); 3479 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3480 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3481 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3482 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3483 3484 /* Get global columns of mat */ 3485 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3486 3487 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3488 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3489 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3490 PetscCall(MatSetType(*mat, mpi_mat_type)); 3491 3492 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3493 maij = (Mat_MPIAIJ *)(*mat)->data; 3494 3495 (*mat)->preallocated = PETSC_TRUE; 3496 3497 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3498 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3499 3500 /* Set A as diagonal portion of *mat */ 3501 maij->A = A; 3502 3503 nz = oi[m]; 3504 for (i = 0; i < nz; i++) { 3505 col = oj[i]; 3506 oj[i] = garray[col]; 3507 } 3508 3509 /* Set Bnew as off-diagonal portion of *mat */ 3510 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3511 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3512 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3513 bnew = (Mat_SeqAIJ *)Bnew->data; 3514 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3515 maij->B = Bnew; 3516 3517 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3518 3519 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3520 b->free_a = PETSC_FALSE; 3521 b->free_ij = PETSC_FALSE; 3522 PetscCall(MatDestroy(&B)); 3523 3524 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3525 bnew->free_a = PETSC_TRUE; 3526 bnew->free_ij = PETSC_TRUE; 3527 3528 /* condense columns of maij->B */ 3529 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3530 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3531 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3532 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3533 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3534 PetscFunctionReturn(PETSC_SUCCESS); 3535 } 3536 3537 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3538 3539 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3540 { 3541 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3542 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3543 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3544 Mat M, Msub, B = a->B; 3545 MatScalar *aa; 3546 Mat_SeqAIJ *aij; 3547 PetscInt *garray = a->garray, *colsub, Ncols; 3548 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3549 IS iscol_sub, iscmap; 3550 const PetscInt *is_idx, *cmap; 3551 PetscBool allcolumns = PETSC_FALSE; 3552 MPI_Comm comm; 3553 3554 PetscFunctionBegin; 3555 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3556 if (call == MAT_REUSE_MATRIX) { 3557 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3558 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3559 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3560 3561 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3562 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3563 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3565 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3566 3567 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3568 3569 } else { /* call == MAT_INITIAL_MATRIX) */ 3570 PetscBool flg; 3571 3572 PetscCall(ISGetLocalSize(iscol, &n)); 3573 PetscCall(ISGetSize(iscol, &Ncols)); 3574 3575 /* (1) iscol -> nonscalable iscol_local */ 3576 /* Check for special case: each processor gets entire matrix columns */ 3577 PetscCall(ISIdentity(iscol_local, &flg)); 3578 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3579 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3580 if (allcolumns) { 3581 iscol_sub = iscol_local; 3582 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3583 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3584 3585 } else { 3586 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3587 PetscInt *idx, *cmap1, k; 3588 PetscCall(PetscMalloc1(Ncols, &idx)); 3589 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3590 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3591 count = 0; 3592 k = 0; 3593 for (i = 0; i < Ncols; i++) { 3594 j = is_idx[i]; 3595 if (j >= cstart && j < cend) { 3596 /* diagonal part of mat */ 3597 idx[count] = j; 3598 cmap1[count++] = i; /* column index in submat */ 3599 } else if (Bn) { 3600 /* off-diagonal part of mat */ 3601 if (j == garray[k]) { 3602 idx[count] = j; 3603 cmap1[count++] = i; /* column index in submat */ 3604 } else if (j > garray[k]) { 3605 while (j > garray[k] && k < Bn - 1) k++; 3606 if (j == garray[k]) { 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } 3610 } 3611 } 3612 } 3613 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3614 3615 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3616 PetscCall(ISGetBlockSize(iscol, &cbs)); 3617 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3618 3619 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3620 } 3621 3622 /* (3) Create sequential Msub */ 3623 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3624 } 3625 3626 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3627 aij = (Mat_SeqAIJ *)(Msub)->data; 3628 ii = aij->i; 3629 PetscCall(ISGetIndices(iscmap, &cmap)); 3630 3631 /* 3632 m - number of local rows 3633 Ncols - number of columns (same on all processors) 3634 rstart - first row in new global matrix generated 3635 */ 3636 PetscCall(MatGetSize(Msub, &m, NULL)); 3637 3638 if (call == MAT_INITIAL_MATRIX) { 3639 /* (4) Create parallel newmat */ 3640 PetscMPIInt rank, size; 3641 PetscInt csize; 3642 3643 PetscCallMPI(MPI_Comm_size(comm, &size)); 3644 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3645 3646 /* 3647 Determine the number of non-zeros in the diagonal and off-diagonal 3648 portions of the matrix in order to do correct preallocation 3649 */ 3650 3651 /* first get start and end of "diagonal" columns */ 3652 PetscCall(ISGetLocalSize(iscol, &csize)); 3653 if (csize == PETSC_DECIDE) { 3654 PetscCall(ISGetSize(isrow, &mglobal)); 3655 if (mglobal == Ncols) { /* square matrix */ 3656 nlocal = m; 3657 } else { 3658 nlocal = Ncols / size + ((Ncols % size) > rank); 3659 } 3660 } else { 3661 nlocal = csize; 3662 } 3663 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3664 rstart = rend - nlocal; 3665 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3666 3667 /* next, compute all the lengths */ 3668 jj = aij->j; 3669 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3670 olens = dlens + m; 3671 for (i = 0; i < m; i++) { 3672 jend = ii[i + 1] - ii[i]; 3673 olen = 0; 3674 dlen = 0; 3675 for (j = 0; j < jend; j++) { 3676 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3677 else dlen++; 3678 jj++; 3679 } 3680 olens[i] = olen; 3681 dlens[i] = dlen; 3682 } 3683 3684 PetscCall(ISGetBlockSize(isrow, &bs)); 3685 PetscCall(ISGetBlockSize(iscol, &cbs)); 3686 3687 PetscCall(MatCreate(comm, &M)); 3688 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3689 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3690 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3691 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3692 PetscCall(PetscFree(dlens)); 3693 3694 } else { /* call == MAT_REUSE_MATRIX */ 3695 M = *newmat; 3696 PetscCall(MatGetLocalSize(M, &i, NULL)); 3697 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3698 PetscCall(MatZeroEntries(M)); 3699 /* 3700 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3701 rather than the slower MatSetValues(). 3702 */ 3703 M->was_assembled = PETSC_TRUE; 3704 M->assembled = PETSC_FALSE; 3705 } 3706 3707 /* (5) Set values of Msub to *newmat */ 3708 PetscCall(PetscMalloc1(count, &colsub)); 3709 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3710 3711 jj = aij->j; 3712 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3713 for (i = 0; i < m; i++) { 3714 row = rstart + i; 3715 nz = ii[i + 1] - ii[i]; 3716 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3717 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3718 jj += nz; 3719 aa += nz; 3720 } 3721 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3722 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3723 3724 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3725 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3726 3727 PetscCall(PetscFree(colsub)); 3728 3729 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3730 if (call == MAT_INITIAL_MATRIX) { 3731 *newmat = M; 3732 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3733 PetscCall(MatDestroy(&Msub)); 3734 3735 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3736 PetscCall(ISDestroy(&iscol_sub)); 3737 3738 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3739 PetscCall(ISDestroy(&iscmap)); 3740 3741 if (iscol_local) { 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3743 PetscCall(ISDestroy(&iscol_local)); 3744 } 3745 } 3746 PetscFunctionReturn(PETSC_SUCCESS); 3747 } 3748 3749 /* 3750 Not great since it makes two copies of the submatrix, first an SeqAIJ 3751 in local and then by concatenating the local matrices the end result. 3752 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3753 3754 This requires a sequential iscol with all indices. 3755 */ 3756 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3757 { 3758 PetscMPIInt rank, size; 3759 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3760 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3761 Mat M, Mreuse; 3762 MatScalar *aa, *vwork; 3763 MPI_Comm comm; 3764 Mat_SeqAIJ *aij; 3765 PetscBool colflag, allcolumns = PETSC_FALSE; 3766 3767 PetscFunctionBegin; 3768 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3769 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3770 PetscCallMPI(MPI_Comm_size(comm, &size)); 3771 3772 /* Check for special case: each processor gets entire matrix columns */ 3773 PetscCall(ISIdentity(iscol, &colflag)); 3774 PetscCall(ISGetLocalSize(iscol, &n)); 3775 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3776 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3777 3778 if (call == MAT_REUSE_MATRIX) { 3779 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3780 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3781 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3782 } else { 3783 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3784 } 3785 3786 /* 3787 m - number of local rows 3788 n - number of columns (same on all processors) 3789 rstart - first row in new global matrix generated 3790 */ 3791 PetscCall(MatGetSize(Mreuse, &m, &n)); 3792 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3793 if (call == MAT_INITIAL_MATRIX) { 3794 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3795 ii = aij->i; 3796 jj = aij->j; 3797 3798 /* 3799 Determine the number of non-zeros in the diagonal and off-diagonal 3800 portions of the matrix in order to do correct preallocation 3801 */ 3802 3803 /* first get start and end of "diagonal" columns */ 3804 if (csize == PETSC_DECIDE) { 3805 PetscCall(ISGetSize(isrow, &mglobal)); 3806 if (mglobal == n) { /* square matrix */ 3807 nlocal = m; 3808 } else { 3809 nlocal = n / size + ((n % size) > rank); 3810 } 3811 } else { 3812 nlocal = csize; 3813 } 3814 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3815 rstart = rend - nlocal; 3816 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3817 3818 /* next, compute all the lengths */ 3819 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3820 olens = dlens + m; 3821 for (i = 0; i < m; i++) { 3822 jend = ii[i + 1] - ii[i]; 3823 olen = 0; 3824 dlen = 0; 3825 for (j = 0; j < jend; j++) { 3826 if (*jj < rstart || *jj >= rend) olen++; 3827 else dlen++; 3828 jj++; 3829 } 3830 olens[i] = olen; 3831 dlens[i] = dlen; 3832 } 3833 PetscCall(MatCreate(comm, &M)); 3834 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3835 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3836 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3837 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3838 PetscCall(PetscFree(dlens)); 3839 } else { 3840 PetscInt ml, nl; 3841 3842 M = *newmat; 3843 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3844 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3845 PetscCall(MatZeroEntries(M)); 3846 /* 3847 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3848 rather than the slower MatSetValues(). 3849 */ 3850 M->was_assembled = PETSC_TRUE; 3851 M->assembled = PETSC_FALSE; 3852 } 3853 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3854 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3855 ii = aij->i; 3856 jj = aij->j; 3857 3858 /* trigger copy to CPU if needed */ 3859 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3860 for (i = 0; i < m; i++) { 3861 row = rstart + i; 3862 nz = ii[i + 1] - ii[i]; 3863 cwork = jj; 3864 jj = PetscSafePointerPlusOffset(jj, nz); 3865 vwork = aa; 3866 aa = PetscSafePointerPlusOffset(aa, nz); 3867 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3868 } 3869 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3870 3871 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3872 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3873 *newmat = M; 3874 3875 /* save submatrix used in processor for next request */ 3876 if (call == MAT_INITIAL_MATRIX) { 3877 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3878 PetscCall(MatDestroy(&Mreuse)); 3879 } 3880 PetscFunctionReturn(PETSC_SUCCESS); 3881 } 3882 3883 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3884 { 3885 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3886 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3887 const PetscInt *JJ; 3888 PetscBool nooffprocentries; 3889 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3890 3891 PetscFunctionBegin; 3892 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3893 3894 PetscCall(PetscLayoutSetUp(B->rmap)); 3895 PetscCall(PetscLayoutSetUp(B->cmap)); 3896 m = B->rmap->n; 3897 cstart = B->cmap->rstart; 3898 cend = B->cmap->rend; 3899 rstart = B->rmap->rstart; 3900 3901 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3902 3903 if (PetscDefined(USE_DEBUG)) { 3904 for (i = 0; i < m; i++) { 3905 nnz = Ii[i + 1] - Ii[i]; 3906 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3907 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3908 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3909 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3910 } 3911 } 3912 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3916 nnz_max = PetscMax(nnz_max, nnz); 3917 d = 0; 3918 for (j = 0; j < nnz; j++) { 3919 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3920 } 3921 d_nnz[i] = d; 3922 o_nnz[i] = nnz - d; 3923 } 3924 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3925 PetscCall(PetscFree2(d_nnz, o_nnz)); 3926 3927 for (i = 0; i < m; i++) { 3928 ii = i + rstart; 3929 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3930 } 3931 nooffprocentries = B->nooffprocentries; 3932 B->nooffprocentries = PETSC_TRUE; 3933 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3934 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3935 B->nooffprocentries = nooffprocentries; 3936 3937 /* count number of entries below block diagonal */ 3938 PetscCall(PetscFree(Aij->ld)); 3939 PetscCall(PetscCalloc1(m, &ld)); 3940 Aij->ld = ld; 3941 for (i = 0; i < m; i++) { 3942 nnz = Ii[i + 1] - Ii[i]; 3943 j = 0; 3944 while (j < nnz && J[j] < cstart) j++; 3945 ld[i] = j; 3946 if (J) J += nnz; 3947 } 3948 3949 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3950 PetscFunctionReturn(PETSC_SUCCESS); 3951 } 3952 3953 /*@ 3954 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3955 (the default parallel PETSc format). 3956 3957 Collective 3958 3959 Input Parameters: 3960 + B - the matrix 3961 . i - the indices into `j` for the start of each local row (indices start with zero) 3962 . j - the column indices for each local row (indices start with zero) 3963 - v - optional values in the matrix 3964 3965 Level: developer 3966 3967 Notes: 3968 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3969 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3970 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3971 3972 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3973 3974 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3975 3976 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3977 3978 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3979 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 .vb 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0] rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = 3] 3993 v = {1,2,3} [size = 3] 3994 3995 Process1 [P1] rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = 3] 3998 v = {4,5,6} [size = 3] 3999 .ve 4000 4001 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4002 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4003 @*/ 4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4005 { 4006 PetscFunctionBegin; 4007 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4008 PetscFunctionReturn(PETSC_SUCCESS); 4009 } 4010 4011 /*@C 4012 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4013 (the default parallel PETSc format). For good matrix assembly performance 4014 the user should preallocate the matrix storage by setting the parameters 4015 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 Example Usage: 4038 Consider the following 8x8 matrix with 34 non-zero values, that is 4039 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4040 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4041 as follows 4042 4043 .vb 4044 1 2 0 | 0 3 0 | 0 4 4045 Proc0 0 5 6 | 7 0 0 | 8 0 4046 9 0 10 | 11 0 0 | 12 0 4047 ------------------------------------- 4048 13 0 14 | 15 16 17 | 0 0 4049 Proc1 0 18 0 | 19 20 21 | 0 0 4050 0 0 0 | 22 23 0 | 24 0 4051 ------------------------------------- 4052 Proc2 25 26 27 | 0 0 28 | 29 0 4053 30 0 0 | 31 32 33 | 0 34 4054 .ve 4055 4056 This can be represented as a collection of submatrices as 4057 .vb 4058 A B C 4059 D E F 4060 G H I 4061 .ve 4062 4063 Where the submatrices A,B,C are owned by proc0, D,E,F are 4064 owned by proc1, G,H,I are owned by proc2. 4065 4066 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4067 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'M','N' parameters are 8,8, and have the same values on all procs. 4069 4070 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4071 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4072 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4073 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4074 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4075 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4076 4077 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4078 allocated for every row of the local diagonal submatrix, and `o_nz` 4079 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4080 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4081 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4082 In this case, the values of `d_nz`, `o_nz` are 4083 .vb 4084 proc0 dnz = 2, o_nz = 2 4085 proc1 dnz = 3, o_nz = 2 4086 proc2 dnz = 1, o_nz = 4 4087 .ve 4088 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4089 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4090 for proc3. i.e we are using 12+15+10=37 storage locations to store 4091 34 values. 4092 4093 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4094 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4095 In the above case the values for `d_nnz`, `o_nnz` are 4096 .vb 4097 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4098 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4099 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4100 .ve 4101 Here the space allocated is sum of all the above values i.e 34, and 4102 hence pre-allocation is perfect. 4103 4104 Level: intermediate 4105 4106 Notes: 4107 If the *_nnz parameter is given then the *_nz parameter is ignored 4108 4109 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4110 storage. The stored row and column indices begin with zero. 4111 See [Sparse Matrices](sec_matsparse) for details. 4112 4113 The parallel matrix is partitioned such that the first m0 rows belong to 4114 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4115 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4116 4117 The DIAGONAL portion of the local submatrix of a processor can be defined 4118 as the submatrix which is obtained by extraction the part corresponding to 4119 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4120 first row that belongs to the processor, r2 is the last row belonging to 4121 the this processor, and c1-c2 is range of indices of the local part of a 4122 vector suitable for applying the matrix to. This is an mxn matrix. In the 4123 common case of a square matrix, the row and column ranges are the same and 4124 the DIAGONAL part is also square. The remaining portion of the local 4125 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4126 4127 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4128 4129 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4130 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4131 You can also run with the option `-info` and look for messages with the string 4132 malloc in them to see if additional memory allocation was needed. 4133 4134 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4135 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4136 @*/ 4137 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4138 { 4139 PetscFunctionBegin; 4140 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4141 PetscValidType(B, 1); 4142 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4143 PetscFunctionReturn(PETSC_SUCCESS); 4144 } 4145 4146 /*@ 4147 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4148 CSR format for the local rows. 4149 4150 Collective 4151 4152 Input Parameters: 4153 + comm - MPI communicator 4154 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4155 . n - This value should be the same as the local size used in creating the 4156 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4157 calculated if `N` is given) For square matrices n is almost always `m`. 4158 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4159 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4160 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4161 . j - global column indices 4162 - a - optional matrix values 4163 4164 Output Parameter: 4165 . mat - the matrix 4166 4167 Level: intermediate 4168 4169 Notes: 4170 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4171 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4172 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4173 4174 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4175 4176 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4177 4178 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4179 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4180 4181 The format which is used for the sparse matrix input, is equivalent to a 4182 row-major ordering, i.e., for the following matrix, the input data expected is 4183 as shown 4184 .vb 4185 1 0 0 4186 2 0 3 P0 4187 ------- 4188 4 5 6 P1 4189 4190 Process0 [P0] rows_owned=[0,1] 4191 i = {0,1,3} [size = nrow+1 = 2+1] 4192 j = {0,0,2} [size = 3] 4193 v = {1,2,3} [size = 3] 4194 4195 Process1 [P1] rows_owned=[2] 4196 i = {0,3} [size = nrow+1 = 1+1] 4197 j = {0,1,2} [size = 3] 4198 v = {4,5,6} [size = 3] 4199 .ve 4200 4201 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4202 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4203 @*/ 4204 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4205 { 4206 PetscFunctionBegin; 4207 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4208 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4209 PetscCall(MatCreate(comm, mat)); 4210 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4211 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4212 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4213 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4214 PetscFunctionReturn(PETSC_SUCCESS); 4215 } 4216 4217 /*@ 4218 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4219 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4220 from `MatCreateMPIAIJWithArrays()` 4221 4222 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4223 4224 Collective 4225 4226 Input Parameters: 4227 + mat - the matrix 4228 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4233 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4234 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4235 . J - column indices 4236 - v - matrix values 4237 4238 Level: deprecated 4239 4240 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4241 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4242 @*/ 4243 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4244 { 4245 PetscInt nnz, i; 4246 PetscBool nooffprocentries; 4247 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4248 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4249 PetscScalar *ad, *ao; 4250 PetscInt ldi, Iii, md; 4251 const PetscInt *Adi = Ad->i; 4252 PetscInt *ld = Aij->ld; 4253 4254 PetscFunctionBegin; 4255 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4256 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4257 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4258 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 4260 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4262 4263 for (i = 0; i < m; i++) { 4264 if (PetscDefined(USE_DEBUG)) { 4265 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4266 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4267 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4268 } 4269 } 4270 nnz = Ii[i + 1] - Ii[i]; 4271 Iii = Ii[i]; 4272 ldi = ld[i]; 4273 md = Adi[i + 1] - Adi[i]; 4274 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4275 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4276 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4277 ad += md; 4278 ao += nnz - md; 4279 } 4280 nooffprocentries = mat->nooffprocentries; 4281 mat->nooffprocentries = PETSC_TRUE; 4282 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4283 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4284 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4285 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4287 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4288 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4289 mat->nooffprocentries = nooffprocentries; 4290 PetscFunctionReturn(PETSC_SUCCESS); 4291 } 4292 4293 /*@ 4294 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4295 4296 Collective 4297 4298 Input Parameters: 4299 + mat - the matrix 4300 - v - matrix values, stored by row 4301 4302 Level: intermediate 4303 4304 Notes: 4305 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4306 4307 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4308 4309 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4310 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4311 @*/ 4312 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4313 { 4314 PetscInt nnz, i, m; 4315 PetscBool nooffprocentries; 4316 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4317 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4318 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4319 PetscScalar *ad, *ao; 4320 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4321 PetscInt ldi, Iii, md; 4322 PetscInt *ld = Aij->ld; 4323 4324 PetscFunctionBegin; 4325 m = mat->rmap->n; 4326 4327 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4328 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4329 Iii = 0; 4330 for (i = 0; i < m; i++) { 4331 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4332 ldi = ld[i]; 4333 md = Adi[i + 1] - Adi[i]; 4334 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4335 ad += md; 4336 if (ao) { 4337 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4338 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4339 ao += nnz - md; 4340 } 4341 Iii += nnz; 4342 } 4343 nooffprocentries = mat->nooffprocentries; 4344 mat->nooffprocentries = PETSC_TRUE; 4345 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4346 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4347 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4348 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4350 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4351 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4352 mat->nooffprocentries = nooffprocentries; 4353 PetscFunctionReturn(PETSC_SUCCESS); 4354 } 4355 4356 /*@C 4357 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4358 (the default parallel PETSc format). For good matrix assembly performance 4359 the user should preallocate the matrix storage by setting the parameters 4360 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4361 4362 Collective 4363 4364 Input Parameters: 4365 + comm - MPI communicator 4366 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4367 This value should be the same as the local size used in creating the 4368 y vector for the matrix-vector product y = Ax. 4369 . n - This value should be the same as the local size used in creating the 4370 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4371 calculated if N is given) For square matrices n is almost always m. 4372 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4373 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4374 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4375 (same value is used for all local rows) 4376 . d_nnz - array containing the number of nonzeros in the various rows of the 4377 DIAGONAL portion of the local submatrix (possibly different for each row) 4378 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4379 The size of this array is equal to the number of local rows, i.e 'm'. 4380 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4381 submatrix (same value is used for all local rows). 4382 - o_nnz - array containing the number of nonzeros in the various rows of the 4383 OFF-DIAGONAL portion of the local submatrix (possibly different for 4384 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4385 structure. The size of this array is equal to the number 4386 of local rows, i.e 'm'. 4387 4388 Output Parameter: 4389 . A - the matrix 4390 4391 Options Database Keys: 4392 + -mat_no_inode - Do not use inodes 4393 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4394 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4395 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4396 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4397 4398 Level: intermediate 4399 4400 Notes: 4401 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4402 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4403 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4404 4405 If the *_nnz parameter is given then the *_nz parameter is ignored 4406 4407 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4408 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4409 storage requirements for this matrix. 4410 4411 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4412 processor than it must be used on all processors that share the object for 4413 that argument. 4414 4415 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4416 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4417 4418 The user MUST specify either the local or global matrix dimensions 4419 (possibly both). 4420 4421 The parallel matrix is partitioned across processors such that the 4422 first `m0` rows belong to process 0, the next `m1` rows belong to 4423 process 1, the next `m2` rows belong to process 2, etc., where 4424 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4425 values corresponding to [m x N] submatrix. 4426 4427 The columns are logically partitioned with the n0 columns belonging 4428 to 0th partition, the next n1 columns belonging to the next 4429 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4430 4431 The DIAGONAL portion of the local submatrix on any given processor 4432 is the submatrix corresponding to the rows and columns m,n 4433 corresponding to the given processor. i.e diagonal matrix on 4434 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4435 etc. The remaining portion of the local submatrix [m x (N-n)] 4436 constitute the OFF-DIAGONAL portion. The example below better 4437 illustrates this concept. 4438 4439 For a square global matrix we define each processor's diagonal portion 4440 to be its local rows and the corresponding columns (a square submatrix); 4441 each processor's off-diagonal portion encompasses the remainder of the 4442 local matrix (a rectangular submatrix). 4443 4444 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4445 4446 When calling this routine with a single process communicator, a matrix of 4447 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4448 type of communicator, use the construction mechanism 4449 .vb 4450 MatCreate(..., &A); 4451 MatSetType(A, MATMPIAIJ); 4452 MatSetSizes(A, m, n, M, N); 4453 MatMPIAIJSetPreallocation(A, ...); 4454 .ve 4455 4456 By default, this format uses inodes (identical nodes) when possible. 4457 We search for consecutive rows with the same nonzero structure, thereby 4458 reusing matrix information to achieve increased efficiency. 4459 4460 Example Usage: 4461 Consider the following 8x8 matrix with 34 non-zero values, that is 4462 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4463 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4464 as follows 4465 4466 .vb 4467 1 2 0 | 0 3 0 | 0 4 4468 Proc0 0 5 6 | 7 0 0 | 8 0 4469 9 0 10 | 11 0 0 | 12 0 4470 ------------------------------------- 4471 13 0 14 | 15 16 17 | 0 0 4472 Proc1 0 18 0 | 19 20 21 | 0 0 4473 0 0 0 | 22 23 0 | 24 0 4474 ------------------------------------- 4475 Proc2 25 26 27 | 0 0 28 | 29 0 4476 30 0 0 | 31 32 33 | 0 34 4477 .ve 4478 4479 This can be represented as a collection of submatrices as 4480 4481 .vb 4482 A B C 4483 D E F 4484 G H I 4485 .ve 4486 4487 Where the submatrices A,B,C are owned by proc0, D,E,F are 4488 owned by proc1, G,H,I are owned by proc2. 4489 4490 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4491 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4492 The 'M','N' parameters are 8,8, and have the same values on all procs. 4493 4494 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4495 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4496 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4497 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4498 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4499 matrix, ans [DF] as another SeqAIJ matrix. 4500 4501 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4502 allocated for every row of the local diagonal submatrix, and `o_nz` 4503 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4504 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4505 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4506 In this case, the values of `d_nz`,`o_nz` are 4507 .vb 4508 proc0 dnz = 2, o_nz = 2 4509 proc1 dnz = 3, o_nz = 2 4510 proc2 dnz = 1, o_nz = 4 4511 .ve 4512 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4513 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4514 for proc3. i.e we are using 12+15+10=37 storage locations to store 4515 34 values. 4516 4517 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4518 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4519 In the above case the values for d_nnz,o_nnz are 4520 .vb 4521 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4522 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4523 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4524 .ve 4525 Here the space allocated is sum of all the above values i.e 34, and 4526 hence pre-allocation is perfect. 4527 4528 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4529 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4530 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4531 @*/ 4532 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4533 { 4534 PetscMPIInt size; 4535 4536 PetscFunctionBegin; 4537 PetscCall(MatCreate(comm, A)); 4538 PetscCall(MatSetSizes(*A, m, n, M, N)); 4539 PetscCallMPI(MPI_Comm_size(comm, &size)); 4540 if (size > 1) { 4541 PetscCall(MatSetType(*A, MATMPIAIJ)); 4542 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4543 } else { 4544 PetscCall(MatSetType(*A, MATSEQAIJ)); 4545 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4546 } 4547 PetscFunctionReturn(PETSC_SUCCESS); 4548 } 4549 4550 /*MC 4551 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4552 4553 Synopsis: 4554 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4555 4556 Not Collective 4557 4558 Input Parameter: 4559 . A - the `MATMPIAIJ` matrix 4560 4561 Output Parameters: 4562 + Ad - the diagonal portion of the matrix 4563 . Ao - the off-diagonal portion of the matrix 4564 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4565 - ierr - error code 4566 4567 Level: advanced 4568 4569 Note: 4570 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4571 4572 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4573 M*/ 4574 4575 /*MC 4576 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4577 4578 Synopsis: 4579 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4580 4581 Not Collective 4582 4583 Input Parameters: 4584 + A - the `MATMPIAIJ` matrix 4585 . Ad - the diagonal portion of the matrix 4586 . Ao - the off-diagonal portion of the matrix 4587 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4588 - ierr - error code 4589 4590 Level: advanced 4591 4592 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4593 M*/ 4594 4595 /*@C 4596 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4597 4598 Not Collective 4599 4600 Input Parameter: 4601 . A - The `MATMPIAIJ` matrix 4602 4603 Output Parameters: 4604 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4605 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4606 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4607 4608 Level: intermediate 4609 4610 Note: 4611 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4612 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4613 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4614 local column numbers to global column numbers in the original matrix. 4615 4616 Fortran Notes: 4617 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4618 4619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4620 @*/ 4621 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4622 { 4623 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4624 PetscBool flg; 4625 4626 PetscFunctionBegin; 4627 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4628 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4629 if (Ad) *Ad = a->A; 4630 if (Ao) *Ao = a->B; 4631 if (colmap) *colmap = a->garray; 4632 PetscFunctionReturn(PETSC_SUCCESS); 4633 } 4634 4635 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4636 { 4637 PetscInt m, N, i, rstart, nnz, Ii; 4638 PetscInt *indx; 4639 PetscScalar *values; 4640 MatType rootType; 4641 4642 PetscFunctionBegin; 4643 PetscCall(MatGetSize(inmat, &m, &N)); 4644 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4645 PetscInt *dnz, *onz, sum, bs, cbs; 4646 4647 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4648 /* Check sum(n) = N */ 4649 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4650 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4651 4652 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4653 rstart -= m; 4654 4655 MatPreallocateBegin(comm, m, n, dnz, onz); 4656 for (i = 0; i < m; i++) { 4657 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4658 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4659 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4660 } 4661 4662 PetscCall(MatCreate(comm, outmat)); 4663 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4664 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4665 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4666 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4667 PetscCall(MatSetType(*outmat, rootType)); 4668 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4669 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4670 MatPreallocateEnd(dnz, onz); 4671 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4672 } 4673 4674 /* numeric phase */ 4675 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4676 for (i = 0; i < m; i++) { 4677 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4678 Ii = i + rstart; 4679 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4680 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4681 } 4682 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4683 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4684 PetscFunctionReturn(PETSC_SUCCESS); 4685 } 4686 4687 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4688 { 4689 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4690 4691 PetscFunctionBegin; 4692 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4693 PetscCall(PetscFree(merge->id_r)); 4694 PetscCall(PetscFree(merge->len_s)); 4695 PetscCall(PetscFree(merge->len_r)); 4696 PetscCall(PetscFree(merge->bi)); 4697 PetscCall(PetscFree(merge->bj)); 4698 PetscCall(PetscFree(merge->buf_ri[0])); 4699 PetscCall(PetscFree(merge->buf_ri)); 4700 PetscCall(PetscFree(merge->buf_rj[0])); 4701 PetscCall(PetscFree(merge->buf_rj)); 4702 PetscCall(PetscFree(merge->coi)); 4703 PetscCall(PetscFree(merge->coj)); 4704 PetscCall(PetscFree(merge->owners_co)); 4705 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4706 PetscCall(PetscFree(merge)); 4707 PetscFunctionReturn(PETSC_SUCCESS); 4708 } 4709 4710 #include <../src/mat/utils/freespace.h> 4711 #include <petscbt.h> 4712 4713 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4714 { 4715 MPI_Comm comm; 4716 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4717 PetscMPIInt size, rank, taga, *len_s; 4718 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4719 PetscInt proc, m; 4720 PetscInt **buf_ri, **buf_rj; 4721 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4722 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4723 MPI_Request *s_waits, *r_waits; 4724 MPI_Status *status; 4725 const MatScalar *aa, *a_a; 4726 MatScalar **abuf_r, *ba_i; 4727 Mat_Merge_SeqsToMPI *merge; 4728 PetscContainer container; 4729 4730 PetscFunctionBegin; 4731 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4732 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4733 4734 PetscCallMPI(MPI_Comm_size(comm, &size)); 4735 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4736 4737 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4738 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4739 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4740 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4741 aa = a_a; 4742 4743 bi = merge->bi; 4744 bj = merge->bj; 4745 buf_ri = merge->buf_ri; 4746 buf_rj = merge->buf_rj; 4747 4748 PetscCall(PetscMalloc1(size, &status)); 4749 owners = merge->rowmap->range; 4750 len_s = merge->len_s; 4751 4752 /* send and recv matrix values */ 4753 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4754 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4755 4756 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4757 for (proc = 0, k = 0; proc < size; proc++) { 4758 if (!len_s[proc]) continue; 4759 i = owners[proc]; 4760 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4761 k++; 4762 } 4763 4764 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4765 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4766 PetscCall(PetscFree(status)); 4767 4768 PetscCall(PetscFree(s_waits)); 4769 PetscCall(PetscFree(r_waits)); 4770 4771 /* insert mat values of mpimat */ 4772 PetscCall(PetscMalloc1(N, &ba_i)); 4773 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4774 4775 for (k = 0; k < merge->nrecv; k++) { 4776 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4777 nrows = *buf_ri_k[k]; 4778 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4779 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4780 } 4781 4782 /* set values of ba */ 4783 m = merge->rowmap->n; 4784 for (i = 0; i < m; i++) { 4785 arow = owners[rank] + i; 4786 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4787 bnzi = bi[i + 1] - bi[i]; 4788 PetscCall(PetscArrayzero(ba_i, bnzi)); 4789 4790 /* add local non-zero vals of this proc's seqmat into ba */ 4791 anzi = ai[arow + 1] - ai[arow]; 4792 aj = a->j + ai[arow]; 4793 aa = a_a + ai[arow]; 4794 nextaj = 0; 4795 for (j = 0; nextaj < anzi; j++) { 4796 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4797 ba_i[j] += aa[nextaj++]; 4798 } 4799 } 4800 4801 /* add received vals into ba */ 4802 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4803 /* i-th row */ 4804 if (i == *nextrow[k]) { 4805 anzi = *(nextai[k] + 1) - *nextai[k]; 4806 aj = buf_rj[k] + *nextai[k]; 4807 aa = abuf_r[k] + *nextai[k]; 4808 nextaj = 0; 4809 for (j = 0; nextaj < anzi; j++) { 4810 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4811 ba_i[j] += aa[nextaj++]; 4812 } 4813 } 4814 nextrow[k]++; 4815 nextai[k]++; 4816 } 4817 } 4818 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4819 } 4820 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4821 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4822 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4823 4824 PetscCall(PetscFree(abuf_r[0])); 4825 PetscCall(PetscFree(abuf_r)); 4826 PetscCall(PetscFree(ba_i)); 4827 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4828 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4829 PetscFunctionReturn(PETSC_SUCCESS); 4830 } 4831 4832 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4833 { 4834 Mat B_mpi; 4835 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4836 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4837 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4838 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4839 PetscInt len, proc, *dnz, *onz, bs, cbs; 4840 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4841 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4842 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4843 MPI_Status *status; 4844 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4845 PetscBT lnkbt; 4846 Mat_Merge_SeqsToMPI *merge; 4847 PetscContainer container; 4848 4849 PetscFunctionBegin; 4850 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4851 4852 /* make sure it is a PETSc comm */ 4853 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4854 PetscCallMPI(MPI_Comm_size(comm, &size)); 4855 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4856 4857 PetscCall(PetscNew(&merge)); 4858 PetscCall(PetscMalloc1(size, &status)); 4859 4860 /* determine row ownership */ 4861 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4862 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4863 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4864 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4865 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4866 PetscCall(PetscMalloc1(size, &len_si)); 4867 PetscCall(PetscMalloc1(size, &merge->len_s)); 4868 4869 m = merge->rowmap->n; 4870 owners = merge->rowmap->range; 4871 4872 /* determine the number of messages to send, their lengths */ 4873 len_s = merge->len_s; 4874 4875 len = 0; /* length of buf_si[] */ 4876 merge->nsend = 0; 4877 for (proc = 0; proc < size; proc++) { 4878 len_si[proc] = 0; 4879 if (proc == rank) { 4880 len_s[proc] = 0; 4881 } else { 4882 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4883 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4884 } 4885 if (len_s[proc]) { 4886 merge->nsend++; 4887 nrows = 0; 4888 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4889 if (ai[i + 1] > ai[i]) nrows++; 4890 } 4891 len_si[proc] = 2 * (nrows + 1); 4892 len += len_si[proc]; 4893 } 4894 } 4895 4896 /* determine the number and length of messages to receive for ij-structure */ 4897 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4898 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4899 4900 /* post the Irecv of j-structure */ 4901 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4902 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4903 4904 /* post the Isend of j-structure */ 4905 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4906 4907 for (proc = 0, k = 0; proc < size; proc++) { 4908 if (!len_s[proc]) continue; 4909 i = owners[proc]; 4910 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4911 k++; 4912 } 4913 4914 /* receives and sends of j-structure are complete */ 4915 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4916 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4917 4918 /* send and recv i-structure */ 4919 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4920 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4921 4922 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4923 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4924 for (proc = 0, k = 0; proc < size; proc++) { 4925 if (!len_s[proc]) continue; 4926 /* form outgoing message for i-structure: 4927 buf_si[0]: nrows to be sent 4928 [1:nrows]: row index (global) 4929 [nrows+1:2*nrows+1]: i-structure index 4930 */ 4931 nrows = len_si[proc] / 2 - 1; 4932 buf_si_i = buf_si + nrows + 1; 4933 buf_si[0] = nrows; 4934 buf_si_i[0] = 0; 4935 nrows = 0; 4936 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4937 anzi = ai[i + 1] - ai[i]; 4938 if (anzi) { 4939 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4940 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4941 nrows++; 4942 } 4943 } 4944 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4945 k++; 4946 buf_si += len_si[proc]; 4947 } 4948 4949 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4950 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4951 4952 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4953 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4954 4955 PetscCall(PetscFree(len_si)); 4956 PetscCall(PetscFree(len_ri)); 4957 PetscCall(PetscFree(rj_waits)); 4958 PetscCall(PetscFree2(si_waits, sj_waits)); 4959 PetscCall(PetscFree(ri_waits)); 4960 PetscCall(PetscFree(buf_s)); 4961 PetscCall(PetscFree(status)); 4962 4963 /* compute a local seq matrix in each processor */ 4964 /* allocate bi array and free space for accumulating nonzero column info */ 4965 PetscCall(PetscMalloc1(m + 1, &bi)); 4966 bi[0] = 0; 4967 4968 /* create and initialize a linked list */ 4969 nlnk = N + 1; 4970 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4971 4972 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4973 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4974 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4975 4976 current_space = free_space; 4977 4978 /* determine symbolic info for each local row */ 4979 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4980 4981 for (k = 0; k < merge->nrecv; k++) { 4982 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4983 nrows = *buf_ri_k[k]; 4984 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4985 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4986 } 4987 4988 MatPreallocateBegin(comm, m, n, dnz, onz); 4989 len = 0; 4990 for (i = 0; i < m; i++) { 4991 bnzi = 0; 4992 /* add local non-zero cols of this proc's seqmat into lnk */ 4993 arow = owners[rank] + i; 4994 anzi = ai[arow + 1] - ai[arow]; 4995 aj = a->j + ai[arow]; 4996 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4997 bnzi += nlnk; 4998 /* add received col data into lnk */ 4999 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5000 if (i == *nextrow[k]) { /* i-th row */ 5001 anzi = *(nextai[k] + 1) - *nextai[k]; 5002 aj = buf_rj[k] + *nextai[k]; 5003 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5004 bnzi += nlnk; 5005 nextrow[k]++; 5006 nextai[k]++; 5007 } 5008 } 5009 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5010 5011 /* if free space is not available, make more free space */ 5012 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5013 /* copy data into free space, then initialize lnk */ 5014 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5015 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5016 5017 current_space->array += bnzi; 5018 current_space->local_used += bnzi; 5019 current_space->local_remaining -= bnzi; 5020 5021 bi[i + 1] = bi[i] + bnzi; 5022 } 5023 5024 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5025 5026 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5027 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5028 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5029 5030 /* create symbolic parallel matrix B_mpi */ 5031 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5032 PetscCall(MatCreate(comm, &B_mpi)); 5033 if (n == PETSC_DECIDE) { 5034 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5035 } else { 5036 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5037 } 5038 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5039 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5040 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5041 MatPreallocateEnd(dnz, onz); 5042 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5043 5044 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5045 B_mpi->assembled = PETSC_FALSE; 5046 merge->bi = bi; 5047 merge->bj = bj; 5048 merge->buf_ri = buf_ri; 5049 merge->buf_rj = buf_rj; 5050 merge->coi = NULL; 5051 merge->coj = NULL; 5052 merge->owners_co = NULL; 5053 5054 PetscCall(PetscCommDestroy(&comm)); 5055 5056 /* attach the supporting struct to B_mpi for reuse */ 5057 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5058 PetscCall(PetscContainerSetPointer(container, merge)); 5059 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5060 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5061 PetscCall(PetscContainerDestroy(&container)); 5062 *mpimat = B_mpi; 5063 5064 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5065 PetscFunctionReturn(PETSC_SUCCESS); 5066 } 5067 5068 /*@ 5069 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5070 matrices from each processor 5071 5072 Collective 5073 5074 Input Parameters: 5075 + comm - the communicators the parallel matrix will live on 5076 . seqmat - the input sequential matrices 5077 . m - number of local rows (or `PETSC_DECIDE`) 5078 . n - number of local columns (or `PETSC_DECIDE`) 5079 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5080 5081 Output Parameter: 5082 . mpimat - the parallel matrix generated 5083 5084 Level: advanced 5085 5086 Note: 5087 The dimensions of the sequential matrix in each processor MUST be the same. 5088 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5089 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5090 5091 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5092 @*/ 5093 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5094 { 5095 PetscMPIInt size; 5096 5097 PetscFunctionBegin; 5098 PetscCallMPI(MPI_Comm_size(comm, &size)); 5099 if (size == 1) { 5100 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5101 if (scall == MAT_INITIAL_MATRIX) { 5102 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5103 } else { 5104 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5105 } 5106 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5107 PetscFunctionReturn(PETSC_SUCCESS); 5108 } 5109 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5110 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5111 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5112 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5113 PetscFunctionReturn(PETSC_SUCCESS); 5114 } 5115 5116 /*@ 5117 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5118 5119 Not Collective 5120 5121 Input Parameter: 5122 . A - the matrix 5123 5124 Output Parameter: 5125 . A_loc - the local sequential matrix generated 5126 5127 Level: developer 5128 5129 Notes: 5130 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5131 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5132 `n` is the global column count obtained with `MatGetSize()` 5133 5134 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5135 5136 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5137 5138 Destroy the matrix with `MatDestroy()` 5139 5140 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5141 @*/ 5142 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5143 { 5144 PetscBool mpi; 5145 5146 PetscFunctionBegin; 5147 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5148 if (mpi) { 5149 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5150 } else { 5151 *A_loc = A; 5152 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5153 } 5154 PetscFunctionReturn(PETSC_SUCCESS); 5155 } 5156 5157 /*@ 5158 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5159 5160 Not Collective 5161 5162 Input Parameters: 5163 + A - the matrix 5164 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5165 5166 Output Parameter: 5167 . A_loc - the local sequential matrix generated 5168 5169 Level: developer 5170 5171 Notes: 5172 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5173 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5174 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5175 5176 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5177 5178 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5179 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5180 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5181 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5182 5183 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5184 @*/ 5185 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5186 { 5187 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5188 Mat_SeqAIJ *mat, *a, *b; 5189 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5190 const PetscScalar *aa, *ba, *aav, *bav; 5191 PetscScalar *ca, *cam; 5192 PetscMPIInt size; 5193 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5194 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5195 PetscBool match; 5196 5197 PetscFunctionBegin; 5198 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5199 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5200 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5201 if (size == 1) { 5202 if (scall == MAT_INITIAL_MATRIX) { 5203 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5204 *A_loc = mpimat->A; 5205 } else if (scall == MAT_REUSE_MATRIX) { 5206 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5207 } 5208 PetscFunctionReturn(PETSC_SUCCESS); 5209 } 5210 5211 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5212 a = (Mat_SeqAIJ *)mpimat->A->data; 5213 b = (Mat_SeqAIJ *)mpimat->B->data; 5214 ai = a->i; 5215 aj = a->j; 5216 bi = b->i; 5217 bj = b->j; 5218 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5219 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5220 aa = aav; 5221 ba = bav; 5222 if (scall == MAT_INITIAL_MATRIX) { 5223 PetscCall(PetscMalloc1(1 + am, &ci)); 5224 ci[0] = 0; 5225 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5226 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5227 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5228 k = 0; 5229 for (i = 0; i < am; i++) { 5230 ncols_o = bi[i + 1] - bi[i]; 5231 ncols_d = ai[i + 1] - ai[i]; 5232 /* off-diagonal portion of A */ 5233 for (jo = 0; jo < ncols_o; jo++) { 5234 col = cmap[*bj]; 5235 if (col >= cstart) break; 5236 cj[k] = col; 5237 bj++; 5238 ca[k++] = *ba++; 5239 } 5240 /* diagonal portion of A */ 5241 for (j = 0; j < ncols_d; j++) { 5242 cj[k] = cstart + *aj++; 5243 ca[k++] = *aa++; 5244 } 5245 /* off-diagonal portion of A */ 5246 for (j = jo; j < ncols_o; j++) { 5247 cj[k] = cmap[*bj++]; 5248 ca[k++] = *ba++; 5249 } 5250 } 5251 /* put together the new matrix */ 5252 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5253 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5254 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5255 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5256 mat->free_a = PETSC_TRUE; 5257 mat->free_ij = PETSC_TRUE; 5258 mat->nonew = 0; 5259 } else if (scall == MAT_REUSE_MATRIX) { 5260 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5261 ci = mat->i; 5262 cj = mat->j; 5263 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5264 for (i = 0; i < am; i++) { 5265 /* off-diagonal portion of A */ 5266 ncols_o = bi[i + 1] - bi[i]; 5267 for (jo = 0; jo < ncols_o; jo++) { 5268 col = cmap[*bj]; 5269 if (col >= cstart) break; 5270 *cam++ = *ba++; 5271 bj++; 5272 } 5273 /* diagonal portion of A */ 5274 ncols_d = ai[i + 1] - ai[i]; 5275 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5276 /* off-diagonal portion of A */ 5277 for (j = jo; j < ncols_o; j++) { 5278 *cam++ = *ba++; 5279 bj++; 5280 } 5281 } 5282 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5283 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5284 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5285 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5286 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5287 PetscFunctionReturn(PETSC_SUCCESS); 5288 } 5289 5290 /*@ 5291 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5292 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5293 5294 Not Collective 5295 5296 Input Parameters: 5297 + A - the matrix 5298 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5299 5300 Output Parameters: 5301 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5302 - A_loc - the local sequential matrix generated 5303 5304 Level: developer 5305 5306 Note: 5307 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5308 part, then those associated with the off-diagonal part (in its local ordering) 5309 5310 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5311 @*/ 5312 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5313 { 5314 Mat Ao, Ad; 5315 const PetscInt *cmap; 5316 PetscMPIInt size; 5317 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5318 5319 PetscFunctionBegin; 5320 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5321 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5322 if (size == 1) { 5323 if (scall == MAT_INITIAL_MATRIX) { 5324 PetscCall(PetscObjectReference((PetscObject)Ad)); 5325 *A_loc = Ad; 5326 } else if (scall == MAT_REUSE_MATRIX) { 5327 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5328 } 5329 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5330 PetscFunctionReturn(PETSC_SUCCESS); 5331 } 5332 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5333 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5334 if (f) { 5335 PetscCall((*f)(A, scall, glob, A_loc)); 5336 } else { 5337 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5338 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5339 Mat_SeqAIJ *c; 5340 PetscInt *ai = a->i, *aj = a->j; 5341 PetscInt *bi = b->i, *bj = b->j; 5342 PetscInt *ci, *cj; 5343 const PetscScalar *aa, *ba; 5344 PetscScalar *ca; 5345 PetscInt i, j, am, dn, on; 5346 5347 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5348 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5349 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5350 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5351 if (scall == MAT_INITIAL_MATRIX) { 5352 PetscInt k; 5353 PetscCall(PetscMalloc1(1 + am, &ci)); 5354 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5355 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5356 ci[0] = 0; 5357 for (i = 0, k = 0; i < am; i++) { 5358 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5359 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5360 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5361 /* diagonal portion of A */ 5362 for (j = 0; j < ncols_d; j++, k++) { 5363 cj[k] = *aj++; 5364 ca[k] = *aa++; 5365 } 5366 /* off-diagonal portion of A */ 5367 for (j = 0; j < ncols_o; j++, k++) { 5368 cj[k] = dn + *bj++; 5369 ca[k] = *ba++; 5370 } 5371 } 5372 /* put together the new matrix */ 5373 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5374 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5375 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5376 c = (Mat_SeqAIJ *)(*A_loc)->data; 5377 c->free_a = PETSC_TRUE; 5378 c->free_ij = PETSC_TRUE; 5379 c->nonew = 0; 5380 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5381 } else if (scall == MAT_REUSE_MATRIX) { 5382 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5383 for (i = 0; i < am; i++) { 5384 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5385 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5386 /* diagonal portion of A */ 5387 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5388 /* off-diagonal portion of A */ 5389 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5390 } 5391 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5392 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5393 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5394 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5395 if (glob) { 5396 PetscInt cst, *gidx; 5397 5398 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5399 PetscCall(PetscMalloc1(dn + on, &gidx)); 5400 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5401 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5402 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5403 } 5404 } 5405 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5406 PetscFunctionReturn(PETSC_SUCCESS); 5407 } 5408 5409 /*@C 5410 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5411 5412 Not Collective 5413 5414 Input Parameters: 5415 + A - the matrix 5416 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5417 . row - index set of rows to extract (or `NULL`) 5418 - col - index set of columns to extract (or `NULL`) 5419 5420 Output Parameter: 5421 . A_loc - the local sequential matrix generated 5422 5423 Level: developer 5424 5425 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5426 @*/ 5427 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5428 { 5429 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5430 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5431 IS isrowa, iscola; 5432 Mat *aloc; 5433 PetscBool match; 5434 5435 PetscFunctionBegin; 5436 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5437 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5438 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5439 if (!row) { 5440 start = A->rmap->rstart; 5441 end = A->rmap->rend; 5442 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5443 } else { 5444 isrowa = *row; 5445 } 5446 if (!col) { 5447 start = A->cmap->rstart; 5448 cmap = a->garray; 5449 nzA = a->A->cmap->n; 5450 nzB = a->B->cmap->n; 5451 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5452 ncols = 0; 5453 for (i = 0; i < nzB; i++) { 5454 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5455 else break; 5456 } 5457 imark = i; 5458 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5459 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5460 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5461 } else { 5462 iscola = *col; 5463 } 5464 if (scall != MAT_INITIAL_MATRIX) { 5465 PetscCall(PetscMalloc1(1, &aloc)); 5466 aloc[0] = *A_loc; 5467 } 5468 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5469 if (!col) { /* attach global id of condensed columns */ 5470 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5471 } 5472 *A_loc = aloc[0]; 5473 PetscCall(PetscFree(aloc)); 5474 if (!row) PetscCall(ISDestroy(&isrowa)); 5475 if (!col) PetscCall(ISDestroy(&iscola)); 5476 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5477 PetscFunctionReturn(PETSC_SUCCESS); 5478 } 5479 5480 /* 5481 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5482 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5483 * on a global size. 5484 * */ 5485 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5486 { 5487 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5488 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5489 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5490 PetscMPIInt owner; 5491 PetscSFNode *iremote, *oiremote; 5492 const PetscInt *lrowindices; 5493 PetscSF sf, osf; 5494 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5495 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5496 MPI_Comm comm; 5497 ISLocalToGlobalMapping mapping; 5498 const PetscScalar *pd_a, *po_a; 5499 5500 PetscFunctionBegin; 5501 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5502 /* plocalsize is the number of roots 5503 * nrows is the number of leaves 5504 * */ 5505 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5506 PetscCall(ISGetLocalSize(rows, &nrows)); 5507 PetscCall(PetscCalloc1(nrows, &iremote)); 5508 PetscCall(ISGetIndices(rows, &lrowindices)); 5509 for (i = 0; i < nrows; i++) { 5510 /* Find a remote index and an owner for a row 5511 * The row could be local or remote 5512 * */ 5513 owner = 0; 5514 lidx = 0; 5515 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5516 iremote[i].index = lidx; 5517 iremote[i].rank = owner; 5518 } 5519 /* Create SF to communicate how many nonzero columns for each row */ 5520 PetscCall(PetscSFCreate(comm, &sf)); 5521 /* SF will figure out the number of nonzero columns for each row, and their 5522 * offsets 5523 * */ 5524 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5525 PetscCall(PetscSFSetFromOptions(sf)); 5526 PetscCall(PetscSFSetUp(sf)); 5527 5528 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5529 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5530 PetscCall(PetscCalloc1(nrows, &pnnz)); 5531 roffsets[0] = 0; 5532 roffsets[1] = 0; 5533 for (i = 0; i < plocalsize; i++) { 5534 /* diagonal */ 5535 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5536 /* off-diagonal */ 5537 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5538 /* compute offsets so that we relative location for each row */ 5539 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5540 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5541 } 5542 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5543 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5544 /* 'r' means root, and 'l' means leaf */ 5545 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5546 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5547 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5548 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5549 PetscCall(PetscSFDestroy(&sf)); 5550 PetscCall(PetscFree(roffsets)); 5551 PetscCall(PetscFree(nrcols)); 5552 dntotalcols = 0; 5553 ontotalcols = 0; 5554 ncol = 0; 5555 for (i = 0; i < nrows; i++) { 5556 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5557 ncol = PetscMax(pnnz[i], ncol); 5558 /* diagonal */ 5559 dntotalcols += nlcols[i * 2 + 0]; 5560 /* off-diagonal */ 5561 ontotalcols += nlcols[i * 2 + 1]; 5562 } 5563 /* We do not need to figure the right number of columns 5564 * since all the calculations will be done by going through the raw data 5565 * */ 5566 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5567 PetscCall(MatSetUp(*P_oth)); 5568 PetscCall(PetscFree(pnnz)); 5569 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5570 /* diagonal */ 5571 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5572 /* off-diagonal */ 5573 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5574 /* diagonal */ 5575 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5576 /* off-diagonal */ 5577 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5578 dntotalcols = 0; 5579 ontotalcols = 0; 5580 ntotalcols = 0; 5581 for (i = 0; i < nrows; i++) { 5582 owner = 0; 5583 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5584 /* Set iremote for diag matrix */ 5585 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5586 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5587 iremote[dntotalcols].rank = owner; 5588 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5589 ilocal[dntotalcols++] = ntotalcols++; 5590 } 5591 /* off-diagonal */ 5592 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5593 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5594 oiremote[ontotalcols].rank = owner; 5595 oilocal[ontotalcols++] = ntotalcols++; 5596 } 5597 } 5598 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5599 PetscCall(PetscFree(loffsets)); 5600 PetscCall(PetscFree(nlcols)); 5601 PetscCall(PetscSFCreate(comm, &sf)); 5602 /* P serves as roots and P_oth is leaves 5603 * Diag matrix 5604 * */ 5605 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5606 PetscCall(PetscSFSetFromOptions(sf)); 5607 PetscCall(PetscSFSetUp(sf)); 5608 5609 PetscCall(PetscSFCreate(comm, &osf)); 5610 /* off-diagonal */ 5611 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5612 PetscCall(PetscSFSetFromOptions(osf)); 5613 PetscCall(PetscSFSetUp(osf)); 5614 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5615 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5616 /* operate on the matrix internal data to save memory */ 5617 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5618 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5619 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5620 /* Convert to global indices for diag matrix */ 5621 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5622 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5623 /* We want P_oth store global indices */ 5624 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5625 /* Use memory scalable approach */ 5626 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5627 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5628 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5629 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5630 /* Convert back to local indices */ 5631 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5632 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5633 nout = 0; 5634 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5635 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5636 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5637 /* Exchange values */ 5638 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5639 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5640 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5641 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5642 /* Stop PETSc from shrinking memory */ 5643 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5644 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5645 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5646 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5647 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5648 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5649 PetscCall(PetscSFDestroy(&sf)); 5650 PetscCall(PetscSFDestroy(&osf)); 5651 PetscFunctionReturn(PETSC_SUCCESS); 5652 } 5653 5654 /* 5655 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5656 * This supports MPIAIJ and MAIJ 5657 * */ 5658 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5659 { 5660 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5661 Mat_SeqAIJ *p_oth; 5662 IS rows, map; 5663 PetscHMapI hamp; 5664 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5665 MPI_Comm comm; 5666 PetscSF sf, osf; 5667 PetscBool has; 5668 5669 PetscFunctionBegin; 5670 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5671 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5672 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5673 * and then create a submatrix (that often is an overlapping matrix) 5674 * */ 5675 if (reuse == MAT_INITIAL_MATRIX) { 5676 /* Use a hash table to figure out unique keys */ 5677 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5678 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5679 count = 0; 5680 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5681 for (i = 0; i < a->B->cmap->n; i++) { 5682 key = a->garray[i] / dof; 5683 PetscCall(PetscHMapIHas(hamp, key, &has)); 5684 if (!has) { 5685 mapping[i] = count; 5686 PetscCall(PetscHMapISet(hamp, key, count++)); 5687 } else { 5688 /* Current 'i' has the same value the previous step */ 5689 mapping[i] = count - 1; 5690 } 5691 } 5692 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5693 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5694 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5695 PetscCall(PetscCalloc1(htsize, &rowindices)); 5696 off = 0; 5697 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5698 PetscCall(PetscHMapIDestroy(&hamp)); 5699 PetscCall(PetscSortInt(htsize, rowindices)); 5700 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5701 /* In case, the matrix was already created but users want to recreate the matrix */ 5702 PetscCall(MatDestroy(P_oth)); 5703 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5704 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5705 PetscCall(ISDestroy(&map)); 5706 PetscCall(ISDestroy(&rows)); 5707 } else if (reuse == MAT_REUSE_MATRIX) { 5708 /* If matrix was already created, we simply update values using SF objects 5709 * that as attached to the matrix earlier. 5710 */ 5711 const PetscScalar *pd_a, *po_a; 5712 5713 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5714 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5715 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5716 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5717 /* Update values in place */ 5718 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5719 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5720 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5721 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5722 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5723 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5725 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5726 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5727 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5728 PetscFunctionReturn(PETSC_SUCCESS); 5729 } 5730 5731 /*@C 5732 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5733 5734 Collective 5735 5736 Input Parameters: 5737 + A - the first matrix in `MATMPIAIJ` format 5738 . B - the second matrix in `MATMPIAIJ` format 5739 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5740 5741 Output Parameters: 5742 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5743 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5744 - B_seq - the sequential matrix generated 5745 5746 Level: developer 5747 5748 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5749 @*/ 5750 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5751 { 5752 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5753 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5754 IS isrowb, iscolb; 5755 Mat *bseq = NULL; 5756 5757 PetscFunctionBegin; 5758 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5759 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5760 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5761 5762 if (scall == MAT_INITIAL_MATRIX) { 5763 start = A->cmap->rstart; 5764 cmap = a->garray; 5765 nzA = a->A->cmap->n; 5766 nzB = a->B->cmap->n; 5767 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5768 ncols = 0; 5769 for (i = 0; i < nzB; i++) { /* row < local row index */ 5770 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5771 else break; 5772 } 5773 imark = i; 5774 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5775 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5776 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5777 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5778 } else { 5779 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5780 isrowb = *rowb; 5781 iscolb = *colb; 5782 PetscCall(PetscMalloc1(1, &bseq)); 5783 bseq[0] = *B_seq; 5784 } 5785 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5786 *B_seq = bseq[0]; 5787 PetscCall(PetscFree(bseq)); 5788 if (!rowb) { 5789 PetscCall(ISDestroy(&isrowb)); 5790 } else { 5791 *rowb = isrowb; 5792 } 5793 if (!colb) { 5794 PetscCall(ISDestroy(&iscolb)); 5795 } else { 5796 *colb = iscolb; 5797 } 5798 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5799 PetscFunctionReturn(PETSC_SUCCESS); 5800 } 5801 5802 /* 5803 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5804 of the OFF-DIAGONAL portion of local A 5805 5806 Collective 5807 5808 Input Parameters: 5809 + A,B - the matrices in `MATMPIAIJ` format 5810 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5811 5812 Output Parameter: 5813 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5814 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5815 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5816 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5817 5818 Developer Note: 5819 This directly accesses information inside the VecScatter associated with the matrix-vector product 5820 for this matrix. This is not desirable.. 5821 5822 Level: developer 5823 5824 */ 5825 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5826 { 5827 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5828 Mat_SeqAIJ *b_oth; 5829 VecScatter ctx; 5830 MPI_Comm comm; 5831 const PetscMPIInt *rprocs, *sprocs; 5832 const PetscInt *srow, *rstarts, *sstarts; 5833 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5834 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5835 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5836 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5837 PetscMPIInt size, tag, rank, nreqs; 5838 5839 PetscFunctionBegin; 5840 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5841 PetscCallMPI(MPI_Comm_size(comm, &size)); 5842 5843 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5844 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5845 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5846 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5847 5848 if (size == 1) { 5849 startsj_s = NULL; 5850 bufa_ptr = NULL; 5851 *B_oth = NULL; 5852 PetscFunctionReturn(PETSC_SUCCESS); 5853 } 5854 5855 ctx = a->Mvctx; 5856 tag = ((PetscObject)ctx)->tag; 5857 5858 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5859 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5860 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5861 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5862 PetscCall(PetscMalloc1(nreqs, &reqs)); 5863 rwaits = reqs; 5864 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5865 5866 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5867 if (scall == MAT_INITIAL_MATRIX) { 5868 /* i-array */ 5869 /* post receives */ 5870 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5871 for (i = 0; i < nrecvs; i++) { 5872 rowlen = rvalues + rstarts[i] * rbs; 5873 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5874 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5875 } 5876 5877 /* pack the outgoing message */ 5878 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5879 5880 sstartsj[0] = 0; 5881 rstartsj[0] = 0; 5882 len = 0; /* total length of j or a array to be sent */ 5883 if (nsends) { 5884 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5885 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5886 } 5887 for (i = 0; i < nsends; i++) { 5888 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5889 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5890 for (j = 0; j < nrows; j++) { 5891 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5892 for (l = 0; l < sbs; l++) { 5893 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5894 5895 rowlen[j * sbs + l] = ncols; 5896 5897 len += ncols; 5898 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5899 } 5900 k++; 5901 } 5902 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5903 5904 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5905 } 5906 /* recvs and sends of i-array are completed */ 5907 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5908 PetscCall(PetscFree(svalues)); 5909 5910 /* allocate buffers for sending j and a arrays */ 5911 PetscCall(PetscMalloc1(len + 1, &bufj)); 5912 PetscCall(PetscMalloc1(len + 1, &bufa)); 5913 5914 /* create i-array of B_oth */ 5915 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5916 5917 b_othi[0] = 0; 5918 len = 0; /* total length of j or a array to be received */ 5919 k = 0; 5920 for (i = 0; i < nrecvs; i++) { 5921 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5922 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5923 for (j = 0; j < nrows; j++) { 5924 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5925 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5926 k++; 5927 } 5928 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5929 } 5930 PetscCall(PetscFree(rvalues)); 5931 5932 /* allocate space for j and a arrays of B_oth */ 5933 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5934 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5935 5936 /* j-array */ 5937 /* post receives of j-array */ 5938 for (i = 0; i < nrecvs; i++) { 5939 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5940 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5941 } 5942 5943 /* pack the outgoing message j-array */ 5944 if (nsends) k = sstarts[0]; 5945 for (i = 0; i < nsends; i++) { 5946 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5947 bufJ = bufj + sstartsj[i]; 5948 for (j = 0; j < nrows; j++) { 5949 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5950 for (ll = 0; ll < sbs; ll++) { 5951 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5952 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5953 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5954 } 5955 } 5956 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5957 } 5958 5959 /* recvs and sends of j-array are completed */ 5960 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5961 } else if (scall == MAT_REUSE_MATRIX) { 5962 sstartsj = *startsj_s; 5963 rstartsj = *startsj_r; 5964 bufa = *bufa_ptr; 5965 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5966 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5967 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5968 5969 /* a-array */ 5970 /* post receives of a-array */ 5971 for (i = 0; i < nrecvs; i++) { 5972 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5973 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5974 } 5975 5976 /* pack the outgoing message a-array */ 5977 if (nsends) k = sstarts[0]; 5978 for (i = 0; i < nsends; i++) { 5979 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5980 bufA = bufa + sstartsj[i]; 5981 for (j = 0; j < nrows; j++) { 5982 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5983 for (ll = 0; ll < sbs; ll++) { 5984 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5985 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5986 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5987 } 5988 } 5989 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5990 } 5991 /* recvs and sends of a-array are completed */ 5992 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5993 PetscCall(PetscFree(reqs)); 5994 5995 if (scall == MAT_INITIAL_MATRIX) { 5996 /* put together the new matrix */ 5997 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5998 5999 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6000 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6001 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6002 b_oth->free_a = PETSC_TRUE; 6003 b_oth->free_ij = PETSC_TRUE; 6004 b_oth->nonew = 0; 6005 6006 PetscCall(PetscFree(bufj)); 6007 if (!startsj_s || !bufa_ptr) { 6008 PetscCall(PetscFree2(sstartsj, rstartsj)); 6009 PetscCall(PetscFree(bufa_ptr)); 6010 } else { 6011 *startsj_s = sstartsj; 6012 *startsj_r = rstartsj; 6013 *bufa_ptr = bufa; 6014 } 6015 } else if (scall == MAT_REUSE_MATRIX) { 6016 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6017 } 6018 6019 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6020 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6021 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6022 PetscFunctionReturn(PETSC_SUCCESS); 6023 } 6024 6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6028 #if defined(PETSC_HAVE_MKL_SPARSE) 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6030 #endif 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6033 #if defined(PETSC_HAVE_ELEMENTAL) 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6035 #endif 6036 #if defined(PETSC_HAVE_SCALAPACK) 6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6038 #endif 6039 #if defined(PETSC_HAVE_HYPRE) 6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_CUDA) 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_HIP) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6052 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6054 6055 /* 6056 Computes (B'*A')' since computing B*A directly is untenable 6057 6058 n p p 6059 [ ] [ ] [ ] 6060 m [ A ] * n [ B ] = m [ C ] 6061 [ ] [ ] [ ] 6062 6063 */ 6064 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6065 { 6066 Mat At, Bt, Ct; 6067 6068 PetscFunctionBegin; 6069 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6070 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6071 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6072 PetscCall(MatDestroy(&At)); 6073 PetscCall(MatDestroy(&Bt)); 6074 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6075 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6076 PetscCall(MatDestroy(&Ct)); 6077 PetscFunctionReturn(PETSC_SUCCESS); 6078 } 6079 6080 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6081 { 6082 PetscBool cisdense; 6083 6084 PetscFunctionBegin; 6085 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6086 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6087 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6088 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6089 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6090 PetscCall(MatSetUp(C)); 6091 6092 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6093 PetscFunctionReturn(PETSC_SUCCESS); 6094 } 6095 6096 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6097 { 6098 Mat_Product *product = C->product; 6099 Mat A = product->A, B = product->B; 6100 6101 PetscFunctionBegin; 6102 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6103 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6104 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6105 C->ops->productsymbolic = MatProductSymbolic_AB; 6106 PetscFunctionReturn(PETSC_SUCCESS); 6107 } 6108 6109 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6110 { 6111 Mat_Product *product = C->product; 6112 6113 PetscFunctionBegin; 6114 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6115 PetscFunctionReturn(PETSC_SUCCESS); 6116 } 6117 6118 /* 6119 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6120 6121 Input Parameters: 6122 6123 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6124 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6125 6126 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6127 6128 For Set1, j1[] contains column indices of the nonzeros. 6129 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6130 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6131 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6132 6133 Similar for Set2. 6134 6135 This routine merges the two sets of nonzeros row by row and removes repeats. 6136 6137 Output Parameters: (memory is allocated by the caller) 6138 6139 i[],j[]: the CSR of the merged matrix, which has m rows. 6140 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6141 imap2[]: similar to imap1[], but for Set2. 6142 Note we order nonzeros row-by-row and from left to right. 6143 */ 6144 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6145 { 6146 PetscInt r, m; /* Row index of mat */ 6147 PetscCount t, t1, t2, b1, e1, b2, e2; 6148 6149 PetscFunctionBegin; 6150 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6151 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6152 i[0] = 0; 6153 for (r = 0; r < m; r++) { /* Do row by row merging */ 6154 b1 = rowBegin1[r]; 6155 e1 = rowEnd1[r]; 6156 b2 = rowBegin2[r]; 6157 e2 = rowEnd2[r]; 6158 while (b1 < e1 && b2 < e2) { 6159 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6160 j[t] = j1[b1]; 6161 imap1[t1] = t; 6162 imap2[t2] = t; 6163 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6164 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6165 t1++; 6166 t2++; 6167 t++; 6168 } else if (j1[b1] < j2[b2]) { 6169 j[t] = j1[b1]; 6170 imap1[t1] = t; 6171 b1 += jmap1[t1 + 1] - jmap1[t1]; 6172 t1++; 6173 t++; 6174 } else { 6175 j[t] = j2[b2]; 6176 imap2[t2] = t; 6177 b2 += jmap2[t2 + 1] - jmap2[t2]; 6178 t2++; 6179 t++; 6180 } 6181 } 6182 /* Merge the remaining in either j1[] or j2[] */ 6183 while (b1 < e1) { 6184 j[t] = j1[b1]; 6185 imap1[t1] = t; 6186 b1 += jmap1[t1 + 1] - jmap1[t1]; 6187 t1++; 6188 t++; 6189 } 6190 while (b2 < e2) { 6191 j[t] = j2[b2]; 6192 imap2[t2] = t; 6193 b2 += jmap2[t2 + 1] - jmap2[t2]; 6194 t2++; 6195 t++; 6196 } 6197 i[r + 1] = t; 6198 } 6199 PetscFunctionReturn(PETSC_SUCCESS); 6200 } 6201 6202 /* 6203 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6204 6205 Input Parameters: 6206 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6207 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6208 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6209 6210 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6211 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6212 6213 Output Parameters: 6214 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6215 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6216 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6217 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6218 6219 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6220 Atot: number of entries belonging to the diagonal block. 6221 Annz: number of unique nonzeros belonging to the diagonal block. 6222 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6223 repeats (i.e., same 'i,j' pair). 6224 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6225 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6226 6227 Atot: number of entries belonging to the diagonal block 6228 Annz: number of unique nonzeros belonging to the diagonal block. 6229 6230 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6231 6232 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6233 */ 6234 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6235 { 6236 PetscInt cstart, cend, rstart, rend, row, col; 6237 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6238 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6239 PetscCount k, m, p, q, r, s, mid; 6240 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6241 6242 PetscFunctionBegin; 6243 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6244 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6245 m = rend - rstart; 6246 6247 /* Skip negative rows */ 6248 for (k = 0; k < n; k++) 6249 if (i[k] >= 0) break; 6250 6251 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6252 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6253 */ 6254 while (k < n) { 6255 row = i[k]; 6256 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6257 for (s = k; s < n; s++) 6258 if (i[s] != row) break; 6259 6260 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6261 for (p = k; p < s; p++) { 6262 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6263 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6264 } 6265 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6266 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6267 rowBegin[row - rstart] = k; 6268 rowMid[row - rstart] = mid; 6269 rowEnd[row - rstart] = s; 6270 6271 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6272 Atot += mid - k; 6273 Btot += s - mid; 6274 6275 /* Count unique nonzeros of this diag row */ 6276 for (p = k; p < mid;) { 6277 col = j[p]; 6278 do { 6279 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6280 p++; 6281 } while (p < mid && j[p] == col); 6282 Annz++; 6283 } 6284 6285 /* Count unique nonzeros of this offdiag row */ 6286 for (p = mid; p < s;) { 6287 col = j[p]; 6288 do { 6289 p++; 6290 } while (p < s && j[p] == col); 6291 Bnnz++; 6292 } 6293 k = s; 6294 } 6295 6296 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6297 PetscCall(PetscMalloc1(Atot, &Aperm)); 6298 PetscCall(PetscMalloc1(Btot, &Bperm)); 6299 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6300 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6301 6302 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6303 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6304 for (r = 0; r < m; r++) { 6305 k = rowBegin[r]; 6306 mid = rowMid[r]; 6307 s = rowEnd[r]; 6308 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6309 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6310 Atot += mid - k; 6311 Btot += s - mid; 6312 6313 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6314 for (p = k; p < mid;) { 6315 col = j[p]; 6316 q = p; 6317 do { 6318 p++; 6319 } while (p < mid && j[p] == col); 6320 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6321 Annz++; 6322 } 6323 6324 for (p = mid; p < s;) { 6325 col = j[p]; 6326 q = p; 6327 do { 6328 p++; 6329 } while (p < s && j[p] == col); 6330 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6331 Bnnz++; 6332 } 6333 } 6334 /* Output */ 6335 *Aperm_ = Aperm; 6336 *Annz_ = Annz; 6337 *Atot_ = Atot; 6338 *Ajmap_ = Ajmap; 6339 *Bperm_ = Bperm; 6340 *Bnnz_ = Bnnz; 6341 *Btot_ = Btot; 6342 *Bjmap_ = Bjmap; 6343 PetscFunctionReturn(PETSC_SUCCESS); 6344 } 6345 6346 /* 6347 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6348 6349 Input Parameters: 6350 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6351 nnz: number of unique nonzeros in the merged matrix 6352 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6353 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6354 6355 Output Parameter: (memory is allocated by the caller) 6356 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6357 6358 Example: 6359 nnz1 = 4 6360 nnz = 6 6361 imap = [1,3,4,5] 6362 jmap = [0,3,5,6,7] 6363 then, 6364 jmap_new = [0,0,3,3,5,6,7] 6365 */ 6366 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6367 { 6368 PetscCount k, p; 6369 6370 PetscFunctionBegin; 6371 jmap_new[0] = 0; 6372 p = nnz; /* p loops over jmap_new[] backwards */ 6373 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6374 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6375 } 6376 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6377 PetscFunctionReturn(PETSC_SUCCESS); 6378 } 6379 6380 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6381 { 6382 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6383 6384 PetscFunctionBegin; 6385 PetscCall(PetscSFDestroy(&coo->sf)); 6386 PetscCall(PetscFree(coo->Aperm1)); 6387 PetscCall(PetscFree(coo->Bperm1)); 6388 PetscCall(PetscFree(coo->Ajmap1)); 6389 PetscCall(PetscFree(coo->Bjmap1)); 6390 PetscCall(PetscFree(coo->Aimap2)); 6391 PetscCall(PetscFree(coo->Bimap2)); 6392 PetscCall(PetscFree(coo->Aperm2)); 6393 PetscCall(PetscFree(coo->Bperm2)); 6394 PetscCall(PetscFree(coo->Ajmap2)); 6395 PetscCall(PetscFree(coo->Bjmap2)); 6396 PetscCall(PetscFree(coo->Cperm1)); 6397 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6398 PetscCall(PetscFree(coo)); 6399 PetscFunctionReturn(PETSC_SUCCESS); 6400 } 6401 6402 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6403 { 6404 MPI_Comm comm; 6405 PetscMPIInt rank, size; 6406 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6407 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6408 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6409 PetscContainer container; 6410 MatCOOStruct_MPIAIJ *coo; 6411 6412 PetscFunctionBegin; 6413 PetscCall(PetscFree(mpiaij->garray)); 6414 PetscCall(VecDestroy(&mpiaij->lvec)); 6415 #if defined(PETSC_USE_CTABLE) 6416 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6417 #else 6418 PetscCall(PetscFree(mpiaij->colmap)); 6419 #endif 6420 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6421 mat->assembled = PETSC_FALSE; 6422 mat->was_assembled = PETSC_FALSE; 6423 6424 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6425 PetscCallMPI(MPI_Comm_size(comm, &size)); 6426 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6427 PetscCall(PetscLayoutSetUp(mat->rmap)); 6428 PetscCall(PetscLayoutSetUp(mat->cmap)); 6429 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6430 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6431 PetscCall(MatGetLocalSize(mat, &m, &n)); 6432 PetscCall(MatGetSize(mat, &M, &N)); 6433 6434 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6435 /* entries come first, then local rows, then remote rows. */ 6436 PetscCount n1 = coo_n, *perm1; 6437 PetscInt *i1 = coo_i, *j1 = coo_j; 6438 6439 PetscCall(PetscMalloc1(n1, &perm1)); 6440 for (k = 0; k < n1; k++) perm1[k] = k; 6441 6442 /* Manipulate indices so that entries with negative row or col indices will have smallest 6443 row indices, local entries will have greater but negative row indices, and remote entries 6444 will have positive row indices. 6445 */ 6446 for (k = 0; k < n1; k++) { 6447 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6448 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6449 else { 6450 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6451 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6452 } 6453 } 6454 6455 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6456 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6457 6458 /* Advance k to the first entry we need to take care of */ 6459 for (k = 0; k < n1; k++) 6460 if (i1[k] > PETSC_MIN_INT) break; 6461 PetscInt i1start = k; 6462 6463 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6464 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6465 6466 /* Send remote rows to their owner */ 6467 /* Find which rows should be sent to which remote ranks*/ 6468 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6469 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6470 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6471 const PetscInt *ranges; 6472 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6473 6474 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6475 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6476 for (k = rem; k < n1;) { 6477 PetscMPIInt owner; 6478 PetscInt firstRow, lastRow; 6479 6480 /* Locate a row range */ 6481 firstRow = i1[k]; /* first row of this owner */ 6482 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6483 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6484 6485 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6486 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6487 6488 /* All entries in [k,p) belong to this remote owner */ 6489 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6490 PetscMPIInt *sendto2; 6491 PetscInt *nentries2; 6492 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6493 6494 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6495 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6496 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6497 PetscCall(PetscFree2(sendto, nentries2)); 6498 sendto = sendto2; 6499 nentries = nentries2; 6500 maxNsend = maxNsend2; 6501 } 6502 sendto[nsend] = owner; 6503 nentries[nsend] = p - k; 6504 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6505 nsend++; 6506 k = p; 6507 } 6508 6509 /* Build 1st SF to know offsets on remote to send data */ 6510 PetscSF sf1; 6511 PetscInt nroots = 1, nroots2 = 0; 6512 PetscInt nleaves = nsend, nleaves2 = 0; 6513 PetscInt *offsets; 6514 PetscSFNode *iremote; 6515 6516 PetscCall(PetscSFCreate(comm, &sf1)); 6517 PetscCall(PetscMalloc1(nsend, &iremote)); 6518 PetscCall(PetscMalloc1(nsend, &offsets)); 6519 for (k = 0; k < nsend; k++) { 6520 iremote[k].rank = sendto[k]; 6521 iremote[k].index = 0; 6522 nleaves2 += nentries[k]; 6523 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6524 } 6525 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6526 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6527 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6528 PetscCall(PetscSFDestroy(&sf1)); 6529 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6530 6531 /* Build 2nd SF to send remote COOs to their owner */ 6532 PetscSF sf2; 6533 nroots = nroots2; 6534 nleaves = nleaves2; 6535 PetscCall(PetscSFCreate(comm, &sf2)); 6536 PetscCall(PetscSFSetFromOptions(sf2)); 6537 PetscCall(PetscMalloc1(nleaves, &iremote)); 6538 p = 0; 6539 for (k = 0; k < nsend; k++) { 6540 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6541 for (q = 0; q < nentries[k]; q++, p++) { 6542 iremote[p].rank = sendto[k]; 6543 iremote[p].index = offsets[k] + q; 6544 } 6545 } 6546 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6547 6548 /* Send the remote COOs to their owner */ 6549 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6550 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6551 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6552 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6553 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6554 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6555 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6556 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6560 6561 PetscCall(PetscFree(offsets)); 6562 PetscCall(PetscFree2(sendto, nentries)); 6563 6564 /* Sort received COOs by row along with the permutation array */ 6565 for (k = 0; k < n2; k++) perm2[k] = k; 6566 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6567 6568 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6569 PetscCount *Cperm1; 6570 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6571 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6572 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6573 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6574 6575 /* Support for HYPRE matrices, kind of a hack. 6576 Swap min column with diagonal so that diagonal values will go first */ 6577 PetscBool hypre; 6578 const char *name; 6579 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6580 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6581 if (hypre) { 6582 PetscInt *minj; 6583 PetscBT hasdiag; 6584 6585 PetscCall(PetscBTCreate(m, &hasdiag)); 6586 PetscCall(PetscMalloc1(m, &minj)); 6587 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6588 for (k = i1start; k < rem; k++) { 6589 if (j1[k] < cstart || j1[k] >= cend) continue; 6590 const PetscInt rindex = i1[k] - rstart; 6591 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6592 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6593 } 6594 for (k = 0; k < n2; k++) { 6595 if (j2[k] < cstart || j2[k] >= cend) continue; 6596 const PetscInt rindex = i2[k] - rstart; 6597 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6598 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6599 } 6600 for (k = i1start; k < rem; k++) { 6601 const PetscInt rindex = i1[k] - rstart; 6602 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6603 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6604 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6605 } 6606 for (k = 0; k < n2; k++) { 6607 const PetscInt rindex = i2[k] - rstart; 6608 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6609 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6610 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6611 } 6612 PetscCall(PetscBTDestroy(&hasdiag)); 6613 PetscCall(PetscFree(minj)); 6614 } 6615 6616 /* Split local COOs and received COOs into diag/offdiag portions */ 6617 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6618 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6619 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6620 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6621 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6622 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6623 6624 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6625 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6626 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6627 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6628 6629 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6630 PetscInt *Ai, *Bi; 6631 PetscInt *Aj, *Bj; 6632 6633 PetscCall(PetscMalloc1(m + 1, &Ai)); 6634 PetscCall(PetscMalloc1(m + 1, &Bi)); 6635 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6636 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6637 6638 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6639 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6640 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6641 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6642 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6643 6644 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6645 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6646 6647 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6648 /* expect nonzeros in A/B most likely have local contributing entries */ 6649 PetscInt Annz = Ai[m]; 6650 PetscInt Bnnz = Bi[m]; 6651 PetscCount *Ajmap1_new, *Bjmap1_new; 6652 6653 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6654 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6655 6656 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6657 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6658 6659 PetscCall(PetscFree(Aimap1)); 6660 PetscCall(PetscFree(Ajmap1)); 6661 PetscCall(PetscFree(Bimap1)); 6662 PetscCall(PetscFree(Bjmap1)); 6663 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6664 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6665 PetscCall(PetscFree(perm1)); 6666 PetscCall(PetscFree3(i2, j2, perm2)); 6667 6668 Ajmap1 = Ajmap1_new; 6669 Bjmap1 = Bjmap1_new; 6670 6671 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6672 if (Annz < Annz1 + Annz2) { 6673 PetscInt *Aj_new; 6674 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6675 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6676 PetscCall(PetscFree(Aj)); 6677 Aj = Aj_new; 6678 } 6679 6680 if (Bnnz < Bnnz1 + Bnnz2) { 6681 PetscInt *Bj_new; 6682 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6683 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6684 PetscCall(PetscFree(Bj)); 6685 Bj = Bj_new; 6686 } 6687 6688 /* Create new submatrices for on-process and off-process coupling */ 6689 PetscScalar *Aa, *Ba; 6690 MatType rtype; 6691 Mat_SeqAIJ *a, *b; 6692 PetscObjectState state; 6693 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6694 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6695 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6696 if (cstart) { 6697 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6698 } 6699 6700 PetscCall(MatGetRootType_Private(mat, &rtype)); 6701 6702 MatSeqXAIJGetOptions_Private(mpiaij->A); 6703 PetscCall(MatDestroy(&mpiaij->A)); 6704 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6705 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6706 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6707 6708 MatSeqXAIJGetOptions_Private(mpiaij->B); 6709 PetscCall(MatDestroy(&mpiaij->B)); 6710 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6711 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6712 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6713 6714 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6715 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6716 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6717 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6718 6719 a = (Mat_SeqAIJ *)mpiaij->A->data; 6720 b = (Mat_SeqAIJ *)mpiaij->B->data; 6721 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6722 a->free_a = b->free_a = PETSC_TRUE; 6723 a->free_ij = b->free_ij = PETSC_TRUE; 6724 6725 /* conversion must happen AFTER multiply setup */ 6726 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6727 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6728 PetscCall(VecDestroy(&mpiaij->lvec)); 6729 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6730 6731 // Put the COO struct in a container and then attach that to the matrix 6732 PetscCall(PetscMalloc1(1, &coo)); 6733 coo->n = coo_n; 6734 coo->sf = sf2; 6735 coo->sendlen = nleaves; 6736 coo->recvlen = nroots; 6737 coo->Annz = Annz; 6738 coo->Bnnz = Bnnz; 6739 coo->Annz2 = Annz2; 6740 coo->Bnnz2 = Bnnz2; 6741 coo->Atot1 = Atot1; 6742 coo->Atot2 = Atot2; 6743 coo->Btot1 = Btot1; 6744 coo->Btot2 = Btot2; 6745 coo->Ajmap1 = Ajmap1; 6746 coo->Aperm1 = Aperm1; 6747 coo->Bjmap1 = Bjmap1; 6748 coo->Bperm1 = Bperm1; 6749 coo->Aimap2 = Aimap2; 6750 coo->Ajmap2 = Ajmap2; 6751 coo->Aperm2 = Aperm2; 6752 coo->Bimap2 = Bimap2; 6753 coo->Bjmap2 = Bjmap2; 6754 coo->Bperm2 = Bperm2; 6755 coo->Cperm1 = Cperm1; 6756 // Allocate in preallocation. If not used, it has zero cost on host 6757 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6758 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6759 PetscCall(PetscContainerSetPointer(container, coo)); 6760 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6761 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6762 PetscCall(PetscContainerDestroy(&container)); 6763 PetscFunctionReturn(PETSC_SUCCESS); 6764 } 6765 6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6767 { 6768 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6769 Mat A = mpiaij->A, B = mpiaij->B; 6770 PetscScalar *Aa, *Ba; 6771 PetscScalar *sendbuf, *recvbuf; 6772 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6773 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6774 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6775 const PetscCount *Cperm1; 6776 PetscContainer container; 6777 MatCOOStruct_MPIAIJ *coo; 6778 6779 PetscFunctionBegin; 6780 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6781 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6782 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6783 sendbuf = coo->sendbuf; 6784 recvbuf = coo->recvbuf; 6785 Ajmap1 = coo->Ajmap1; 6786 Ajmap2 = coo->Ajmap2; 6787 Aimap2 = coo->Aimap2; 6788 Bjmap1 = coo->Bjmap1; 6789 Bjmap2 = coo->Bjmap2; 6790 Bimap2 = coo->Bimap2; 6791 Aperm1 = coo->Aperm1; 6792 Aperm2 = coo->Aperm2; 6793 Bperm1 = coo->Bperm1; 6794 Bperm2 = coo->Bperm2; 6795 Cperm1 = coo->Cperm1; 6796 6797 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6798 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6799 6800 /* Pack entries to be sent to remote */ 6801 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6802 6803 /* Send remote entries to their owner and overlap the communication with local computation */ 6804 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6805 /* Add local entries to A and B */ 6806 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6807 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6808 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6809 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6810 } 6811 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6812 PetscScalar sum = 0.0; 6813 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6814 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6815 } 6816 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6817 6818 /* Add received remote entries to A and B */ 6819 for (PetscCount i = 0; i < coo->Annz2; i++) { 6820 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6821 } 6822 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6823 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6824 } 6825 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6826 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6827 PetscFunctionReturn(PETSC_SUCCESS); 6828 } 6829 6830 /*MC 6831 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6832 6833 Options Database Keys: 6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6835 6836 Level: beginner 6837 6838 Notes: 6839 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6840 in this case the values associated with the rows and columns one passes in are set to zero 6841 in the matrix 6842 6843 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6844 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6845 6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6847 M*/ 6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6849 { 6850 Mat_MPIAIJ *b; 6851 PetscMPIInt size; 6852 6853 PetscFunctionBegin; 6854 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6855 6856 PetscCall(PetscNew(&b)); 6857 B->data = (void *)b; 6858 B->ops[0] = MatOps_Values; 6859 B->assembled = PETSC_FALSE; 6860 B->insertmode = NOT_SET_VALUES; 6861 b->size = size; 6862 6863 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6864 6865 /* build cache for off array entries formed */ 6866 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6867 6868 b->donotstash = PETSC_FALSE; 6869 b->colmap = NULL; 6870 b->garray = NULL; 6871 b->roworiented = PETSC_TRUE; 6872 6873 /* stuff used for matrix vector multiply */ 6874 b->lvec = NULL; 6875 b->Mvctx = NULL; 6876 6877 /* stuff for MatGetRow() */ 6878 b->rowindices = NULL; 6879 b->rowvalues = NULL; 6880 b->getrowactive = PETSC_FALSE; 6881 6882 /* flexible pointer used in CUSPARSE classes */ 6883 b->spptr = NULL; 6884 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6895 #if defined(PETSC_HAVE_CUDA) 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6897 #endif 6898 #if defined(PETSC_HAVE_HIP) 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6900 #endif 6901 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6903 #endif 6904 #if defined(PETSC_HAVE_MKL_SPARSE) 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6906 #endif 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6911 #if defined(PETSC_HAVE_ELEMENTAL) 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6913 #endif 6914 #if defined(PETSC_HAVE_SCALAPACK) 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6916 #endif 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6919 #if defined(PETSC_HAVE_HYPRE) 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6922 #endif 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6927 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6928 PetscFunctionReturn(PETSC_SUCCESS); 6929 } 6930 6931 /*@C 6932 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6933 and "off-diagonal" part of the matrix in CSR format. 6934 6935 Collective 6936 6937 Input Parameters: 6938 + comm - MPI communicator 6939 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6940 . n - This value should be the same as the local size used in creating the 6941 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6942 calculated if `N` is given) For square matrices `n` is almost always `m`. 6943 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6944 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6945 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6946 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6947 . a - matrix values 6948 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6949 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6950 - oa - matrix values 6951 6952 Output Parameter: 6953 . mat - the matrix 6954 6955 Level: advanced 6956 6957 Notes: 6958 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6959 must free the arrays once the matrix has been destroyed and not before. 6960 6961 The `i` and `j` indices are 0 based 6962 6963 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6964 6965 This sets local rows and cannot be used to set off-processor values. 6966 6967 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6968 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6969 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6970 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6971 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6972 communication if it is known that only local entries will be set. 6973 6974 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6975 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6976 @*/ 6977 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6978 { 6979 Mat_MPIAIJ *maij; 6980 6981 PetscFunctionBegin; 6982 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6983 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6984 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6985 PetscCall(MatCreate(comm, mat)); 6986 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6987 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6988 maij = (Mat_MPIAIJ *)(*mat)->data; 6989 6990 (*mat)->preallocated = PETSC_TRUE; 6991 6992 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6993 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6994 6995 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6996 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6997 6998 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6999 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7000 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7001 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7002 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7003 PetscFunctionReturn(PETSC_SUCCESS); 7004 } 7005 7006 typedef struct { 7007 Mat *mp; /* intermediate products */ 7008 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7009 PetscInt cp; /* number of intermediate products */ 7010 7011 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7012 PetscInt *startsj_s, *startsj_r; 7013 PetscScalar *bufa; 7014 Mat P_oth; 7015 7016 /* may take advantage of merging product->B */ 7017 Mat Bloc; /* B-local by merging diag and off-diag */ 7018 7019 /* cusparse does not have support to split between symbolic and numeric phases. 7020 When api_user is true, we don't need to update the numerical values 7021 of the temporary storage */ 7022 PetscBool reusesym; 7023 7024 /* support for COO values insertion */ 7025 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7026 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7027 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7028 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7029 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7030 PetscMemType mtype; 7031 7032 /* customization */ 7033 PetscBool abmerge; 7034 PetscBool P_oth_bind; 7035 } MatMatMPIAIJBACKEND; 7036 7037 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7038 { 7039 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7040 PetscInt i; 7041 7042 PetscFunctionBegin; 7043 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7044 PetscCall(PetscFree(mmdata->bufa)); 7045 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7046 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7047 PetscCall(MatDestroy(&mmdata->P_oth)); 7048 PetscCall(MatDestroy(&mmdata->Bloc)); 7049 PetscCall(PetscSFDestroy(&mmdata->sf)); 7050 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7051 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7052 PetscCall(PetscFree(mmdata->own[0])); 7053 PetscCall(PetscFree(mmdata->own)); 7054 PetscCall(PetscFree(mmdata->off[0])); 7055 PetscCall(PetscFree(mmdata->off)); 7056 PetscCall(PetscFree(mmdata)); 7057 PetscFunctionReturn(PETSC_SUCCESS); 7058 } 7059 7060 /* Copy selected n entries with indices in idx[] of A to v[]. 7061 If idx is NULL, copy the whole data array of A to v[] 7062 */ 7063 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7064 { 7065 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7066 7067 PetscFunctionBegin; 7068 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7069 if (f) { 7070 PetscCall((*f)(A, n, idx, v)); 7071 } else { 7072 const PetscScalar *vv; 7073 7074 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7075 if (n && idx) { 7076 PetscScalar *w = v; 7077 const PetscInt *oi = idx; 7078 PetscInt j; 7079 7080 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7081 } else { 7082 PetscCall(PetscArraycpy(v, vv, n)); 7083 } 7084 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7085 } 7086 PetscFunctionReturn(PETSC_SUCCESS); 7087 } 7088 7089 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7090 { 7091 MatMatMPIAIJBACKEND *mmdata; 7092 PetscInt i, n_d, n_o; 7093 7094 PetscFunctionBegin; 7095 MatCheckProduct(C, 1); 7096 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7097 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7098 if (!mmdata->reusesym) { /* update temporary matrices */ 7099 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7100 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7101 } 7102 mmdata->reusesym = PETSC_FALSE; 7103 7104 for (i = 0; i < mmdata->cp; i++) { 7105 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7106 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7107 } 7108 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7109 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7110 7111 if (mmdata->mptmp[i]) continue; 7112 if (noff) { 7113 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7114 7115 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7116 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7117 n_o += noff; 7118 n_d += nown; 7119 } else { 7120 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7121 7122 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7123 n_d += mm->nz; 7124 } 7125 } 7126 if (mmdata->hasoffproc) { /* offprocess insertion */ 7127 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7128 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7129 } 7130 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7131 PetscFunctionReturn(PETSC_SUCCESS); 7132 } 7133 7134 /* Support for Pt * A, A * P, or Pt * A * P */ 7135 #define MAX_NUMBER_INTERMEDIATE 4 7136 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7137 { 7138 Mat_Product *product = C->product; 7139 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7140 Mat_MPIAIJ *a, *p; 7141 MatMatMPIAIJBACKEND *mmdata; 7142 ISLocalToGlobalMapping P_oth_l2g = NULL; 7143 IS glob = NULL; 7144 const char *prefix; 7145 char pprefix[256]; 7146 const PetscInt *globidx, *P_oth_idx; 7147 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7148 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7149 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7150 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7151 /* a base offset; type-2: sparse with a local to global map table */ 7152 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7153 7154 MatProductType ptype; 7155 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7156 PetscMPIInt size; 7157 7158 PetscFunctionBegin; 7159 MatCheckProduct(C, 1); 7160 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7161 ptype = product->type; 7162 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7163 ptype = MATPRODUCT_AB; 7164 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7165 } 7166 switch (ptype) { 7167 case MATPRODUCT_AB: 7168 A = product->A; 7169 P = product->B; 7170 m = A->rmap->n; 7171 n = P->cmap->n; 7172 M = A->rmap->N; 7173 N = P->cmap->N; 7174 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7175 break; 7176 case MATPRODUCT_AtB: 7177 P = product->A; 7178 A = product->B; 7179 m = P->cmap->n; 7180 n = A->cmap->n; 7181 M = P->cmap->N; 7182 N = A->cmap->N; 7183 hasoffproc = PETSC_TRUE; 7184 break; 7185 case MATPRODUCT_PtAP: 7186 A = product->A; 7187 P = product->B; 7188 m = P->cmap->n; 7189 n = P->cmap->n; 7190 M = P->cmap->N; 7191 N = P->cmap->N; 7192 hasoffproc = PETSC_TRUE; 7193 break; 7194 default: 7195 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7196 } 7197 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7198 if (size == 1) hasoffproc = PETSC_FALSE; 7199 7200 /* defaults */ 7201 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7202 mp[i] = NULL; 7203 mptmp[i] = PETSC_FALSE; 7204 rmapt[i] = -1; 7205 cmapt[i] = -1; 7206 rmapa[i] = NULL; 7207 cmapa[i] = NULL; 7208 } 7209 7210 /* customization */ 7211 PetscCall(PetscNew(&mmdata)); 7212 mmdata->reusesym = product->api_user; 7213 if (ptype == MATPRODUCT_AB) { 7214 if (product->api_user) { 7215 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7216 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7217 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7218 PetscOptionsEnd(); 7219 } else { 7220 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7221 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7222 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7223 PetscOptionsEnd(); 7224 } 7225 } else if (ptype == MATPRODUCT_PtAP) { 7226 if (product->api_user) { 7227 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7228 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7229 PetscOptionsEnd(); 7230 } else { 7231 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7232 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7233 PetscOptionsEnd(); 7234 } 7235 } 7236 a = (Mat_MPIAIJ *)A->data; 7237 p = (Mat_MPIAIJ *)P->data; 7238 PetscCall(MatSetSizes(C, m, n, M, N)); 7239 PetscCall(PetscLayoutSetUp(C->rmap)); 7240 PetscCall(PetscLayoutSetUp(C->cmap)); 7241 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7242 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7243 7244 cp = 0; 7245 switch (ptype) { 7246 case MATPRODUCT_AB: /* A * P */ 7247 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7248 7249 /* A_diag * P_local (merged or not) */ 7250 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7251 /* P is product->B */ 7252 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7253 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7254 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7255 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7256 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7257 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7258 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7259 mp[cp]->product->api_user = product->api_user; 7260 PetscCall(MatProductSetFromOptions(mp[cp])); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 PetscCall(ISGetIndices(glob, &globidx)); 7263 rmapt[cp] = 1; 7264 cmapt[cp] = 2; 7265 cmapa[cp] = globidx; 7266 mptmp[cp] = PETSC_FALSE; 7267 cp++; 7268 } else { /* A_diag * P_diag and A_diag * P_off */ 7269 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7270 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7271 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7272 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7273 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7274 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7275 mp[cp]->product->api_user = product->api_user; 7276 PetscCall(MatProductSetFromOptions(mp[cp])); 7277 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7278 rmapt[cp] = 1; 7279 cmapt[cp] = 1; 7280 mptmp[cp] = PETSC_FALSE; 7281 cp++; 7282 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 rmapt[cp] = 1; 7292 cmapt[cp] = 2; 7293 cmapa[cp] = p->garray; 7294 mptmp[cp] = PETSC_FALSE; 7295 cp++; 7296 } 7297 7298 /* A_off * P_other */ 7299 if (mmdata->P_oth) { 7300 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7301 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7302 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7303 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7304 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7305 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7306 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7307 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7308 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7309 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7310 mp[cp]->product->api_user = product->api_user; 7311 PetscCall(MatProductSetFromOptions(mp[cp])); 7312 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7313 rmapt[cp] = 1; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = P_oth_idx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 } 7319 break; 7320 7321 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7322 /* A is product->B */ 7323 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7324 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7325 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 PetscCall(ISGetIndices(glob, &globidx)); 7335 rmapt[cp] = 2; 7336 rmapa[cp] = globidx; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = globidx; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 } else { 7342 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7343 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7344 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7345 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7346 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7347 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7348 mp[cp]->product->api_user = product->api_user; 7349 PetscCall(MatProductSetFromOptions(mp[cp])); 7350 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7351 PetscCall(ISGetIndices(glob, &globidx)); 7352 rmapt[cp] = 1; 7353 cmapt[cp] = 2; 7354 cmapa[cp] = globidx; 7355 mptmp[cp] = PETSC_FALSE; 7356 cp++; 7357 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 rmapt[cp] = 2; 7367 rmapa[cp] = p->garray; 7368 cmapt[cp] = 2; 7369 cmapa[cp] = globidx; 7370 mptmp[cp] = PETSC_FALSE; 7371 cp++; 7372 } 7373 break; 7374 case MATPRODUCT_PtAP: 7375 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7376 /* P is product->B */ 7377 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7378 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7379 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7380 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7381 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7382 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7383 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7384 mp[cp]->product->api_user = product->api_user; 7385 PetscCall(MatProductSetFromOptions(mp[cp])); 7386 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7387 PetscCall(ISGetIndices(glob, &globidx)); 7388 rmapt[cp] = 2; 7389 rmapa[cp] = globidx; 7390 cmapt[cp] = 2; 7391 cmapa[cp] = globidx; 7392 mptmp[cp] = PETSC_FALSE; 7393 cp++; 7394 if (mmdata->P_oth) { 7395 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7396 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7397 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7398 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7399 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7400 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7401 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7402 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7403 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7404 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7405 mp[cp]->product->api_user = product->api_user; 7406 PetscCall(MatProductSetFromOptions(mp[cp])); 7407 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7408 mptmp[cp] = PETSC_TRUE; 7409 cp++; 7410 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7411 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7412 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7413 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7414 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7415 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7416 mp[cp]->product->api_user = product->api_user; 7417 PetscCall(MatProductSetFromOptions(mp[cp])); 7418 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7419 rmapt[cp] = 2; 7420 rmapa[cp] = globidx; 7421 cmapt[cp] = 2; 7422 cmapa[cp] = P_oth_idx; 7423 mptmp[cp] = PETSC_FALSE; 7424 cp++; 7425 } 7426 break; 7427 default: 7428 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7429 } 7430 /* sanity check */ 7431 if (size > 1) 7432 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7433 7434 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7435 for (i = 0; i < cp; i++) { 7436 mmdata->mp[i] = mp[i]; 7437 mmdata->mptmp[i] = mptmp[i]; 7438 } 7439 mmdata->cp = cp; 7440 C->product->data = mmdata; 7441 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7442 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7443 7444 /* memory type */ 7445 mmdata->mtype = PETSC_MEMTYPE_HOST; 7446 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7447 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7448 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7449 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7450 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7451 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7452 7453 /* prepare coo coordinates for values insertion */ 7454 7455 /* count total nonzeros of those intermediate seqaij Mats 7456 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7457 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7458 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7459 */ 7460 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7461 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7462 if (mptmp[cp]) continue; 7463 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7464 const PetscInt *rmap = rmapa[cp]; 7465 const PetscInt mr = mp[cp]->rmap->n; 7466 const PetscInt rs = C->rmap->rstart; 7467 const PetscInt re = C->rmap->rend; 7468 const PetscInt *ii = mm->i; 7469 for (i = 0; i < mr; i++) { 7470 const PetscInt gr = rmap[i]; 7471 const PetscInt nz = ii[i + 1] - ii[i]; 7472 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7473 else ncoo_oown += nz; /* this row is local */ 7474 } 7475 } else ncoo_d += mm->nz; 7476 } 7477 7478 /* 7479 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7480 7481 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7482 7483 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7484 7485 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7486 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7487 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7488 7489 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7490 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7491 */ 7492 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7493 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7494 7495 /* gather (i,j) of nonzeros inserted by remote procs */ 7496 if (hasoffproc) { 7497 PetscSF msf; 7498 PetscInt ncoo2, *coo_i2, *coo_j2; 7499 7500 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7501 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7502 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7503 7504 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7505 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7506 PetscInt *idxoff = mmdata->off[cp]; 7507 PetscInt *idxown = mmdata->own[cp]; 7508 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7509 const PetscInt *rmap = rmapa[cp]; 7510 const PetscInt *cmap = cmapa[cp]; 7511 const PetscInt *ii = mm->i; 7512 PetscInt *coi = coo_i + ncoo_o; 7513 PetscInt *coj = coo_j + ncoo_o; 7514 const PetscInt mr = mp[cp]->rmap->n; 7515 const PetscInt rs = C->rmap->rstart; 7516 const PetscInt re = C->rmap->rend; 7517 const PetscInt cs = C->cmap->rstart; 7518 for (i = 0; i < mr; i++) { 7519 const PetscInt *jj = mm->j + ii[i]; 7520 const PetscInt gr = rmap[i]; 7521 const PetscInt nz = ii[i + 1] - ii[i]; 7522 if (gr < rs || gr >= re) { /* this is an offproc row */ 7523 for (j = ii[i]; j < ii[i + 1]; j++) { 7524 *coi++ = gr; 7525 *idxoff++ = j; 7526 } 7527 if (!cmapt[cp]) { /* already global */ 7528 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7529 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7530 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7531 } else { /* offdiag */ 7532 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7533 } 7534 ncoo_o += nz; 7535 } else { /* this is a local row */ 7536 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7537 } 7538 } 7539 } 7540 mmdata->off[cp + 1] = idxoff; 7541 mmdata->own[cp + 1] = idxown; 7542 } 7543 7544 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7545 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7546 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7547 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7548 ncoo = ncoo_d + ncoo_oown + ncoo2; 7549 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7550 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7551 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7552 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7553 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7554 PetscCall(PetscFree2(coo_i, coo_j)); 7555 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7556 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7557 coo_i = coo_i2; 7558 coo_j = coo_j2; 7559 } else { /* no offproc values insertion */ 7560 ncoo = ncoo_d; 7561 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7562 7563 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7564 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7565 PetscCall(PetscSFSetUp(mmdata->sf)); 7566 } 7567 mmdata->hasoffproc = hasoffproc; 7568 7569 /* gather (i,j) of nonzeros inserted locally */ 7570 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7571 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7572 PetscInt *coi = coo_i + ncoo_d; 7573 PetscInt *coj = coo_j + ncoo_d; 7574 const PetscInt *jj = mm->j; 7575 const PetscInt *ii = mm->i; 7576 const PetscInt *cmap = cmapa[cp]; 7577 const PetscInt *rmap = rmapa[cp]; 7578 const PetscInt mr = mp[cp]->rmap->n; 7579 const PetscInt rs = C->rmap->rstart; 7580 const PetscInt re = C->rmap->rend; 7581 const PetscInt cs = C->cmap->rstart; 7582 7583 if (mptmp[cp]) continue; 7584 if (rmapt[cp] == 1) { /* consecutive rows */ 7585 /* fill coo_i */ 7586 for (i = 0; i < mr; i++) { 7587 const PetscInt gr = i + rs; 7588 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7589 } 7590 /* fill coo_j */ 7591 if (!cmapt[cp]) { /* type-0, already global */ 7592 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7593 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7594 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7595 } else { /* type-2, local to global for sparse columns */ 7596 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7597 } 7598 ncoo_d += mm->nz; 7599 } else if (rmapt[cp] == 2) { /* sparse rows */ 7600 for (i = 0; i < mr; i++) { 7601 const PetscInt *jj = mm->j + ii[i]; 7602 const PetscInt gr = rmap[i]; 7603 const PetscInt nz = ii[i + 1] - ii[i]; 7604 if (gr >= rs && gr < re) { /* local rows */ 7605 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7606 if (!cmapt[cp]) { /* type-0, already global */ 7607 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7608 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7609 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7610 } else { /* type-2, local to global for sparse columns */ 7611 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7612 } 7613 ncoo_d += nz; 7614 } 7615 } 7616 } 7617 } 7618 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7619 PetscCall(ISDestroy(&glob)); 7620 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7621 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7622 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7623 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7624 7625 /* preallocate with COO data */ 7626 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7627 PetscCall(PetscFree2(coo_i, coo_j)); 7628 PetscFunctionReturn(PETSC_SUCCESS); 7629 } 7630 7631 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7632 { 7633 Mat_Product *product = mat->product; 7634 #if defined(PETSC_HAVE_DEVICE) 7635 PetscBool match = PETSC_FALSE; 7636 PetscBool usecpu = PETSC_FALSE; 7637 #else 7638 PetscBool match = PETSC_TRUE; 7639 #endif 7640 7641 PetscFunctionBegin; 7642 MatCheckProduct(mat, 1); 7643 #if defined(PETSC_HAVE_DEVICE) 7644 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7645 if (match) { /* we can always fallback to the CPU if requested */ 7646 switch (product->type) { 7647 case MATPRODUCT_AB: 7648 if (product->api_user) { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7650 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } else { 7653 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7654 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7655 PetscOptionsEnd(); 7656 } 7657 break; 7658 case MATPRODUCT_AtB: 7659 if (product->api_user) { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7661 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } else { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7665 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } 7668 break; 7669 case MATPRODUCT_PtAP: 7670 if (product->api_user) { 7671 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7672 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7673 PetscOptionsEnd(); 7674 } else { 7675 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7676 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7677 PetscOptionsEnd(); 7678 } 7679 break; 7680 default: 7681 break; 7682 } 7683 match = (PetscBool)!usecpu; 7684 } 7685 #endif 7686 if (match) { 7687 switch (product->type) { 7688 case MATPRODUCT_AB: 7689 case MATPRODUCT_AtB: 7690 case MATPRODUCT_PtAP: 7691 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7692 break; 7693 default: 7694 break; 7695 } 7696 } 7697 /* fallback to MPIAIJ ops */ 7698 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7699 PetscFunctionReturn(PETSC_SUCCESS); 7700 } 7701 7702 /* 7703 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7704 7705 n - the number of block indices in cc[] 7706 cc - the block indices (must be large enough to contain the indices) 7707 */ 7708 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7709 { 7710 PetscInt cnt = -1, nidx, j; 7711 const PetscInt *idx; 7712 7713 PetscFunctionBegin; 7714 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7715 if (nidx) { 7716 cnt = 0; 7717 cc[cnt] = idx[0] / bs; 7718 for (j = 1; j < nidx; j++) { 7719 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7720 } 7721 } 7722 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7723 *n = cnt + 1; 7724 PetscFunctionReturn(PETSC_SUCCESS); 7725 } 7726 7727 /* 7728 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7729 7730 ncollapsed - the number of block indices 7731 collapsed - the block indices (must be large enough to contain the indices) 7732 */ 7733 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7734 { 7735 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7736 7737 PetscFunctionBegin; 7738 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7739 for (i = start + 1; i < start + bs; i++) { 7740 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7741 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7742 cprevtmp = cprev; 7743 cprev = merged; 7744 merged = cprevtmp; 7745 } 7746 *ncollapsed = nprev; 7747 if (collapsed) *collapsed = cprev; 7748 PetscFunctionReturn(PETSC_SUCCESS); 7749 } 7750 7751 /* 7752 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7753 7754 Input Parameter: 7755 . Amat - matrix 7756 - symmetrize - make the result symmetric 7757 + scale - scale with diagonal 7758 7759 Output Parameter: 7760 . a_Gmat - output scalar graph >= 0 7761 7762 */ 7763 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7764 { 7765 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7766 MPI_Comm comm; 7767 Mat Gmat; 7768 PetscBool ismpiaij, isseqaij; 7769 Mat a, b, c; 7770 MatType jtype; 7771 7772 PetscFunctionBegin; 7773 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7774 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7775 PetscCall(MatGetSize(Amat, &MM, &NN)); 7776 PetscCall(MatGetBlockSize(Amat, &bs)); 7777 nloc = (Iend - Istart) / bs; 7778 7779 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7780 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7781 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7782 7783 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7784 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7785 implementation */ 7786 if (bs > 1) { 7787 PetscCall(MatGetType(Amat, &jtype)); 7788 PetscCall(MatCreate(comm, &Gmat)); 7789 PetscCall(MatSetType(Gmat, jtype)); 7790 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7791 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7792 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7793 PetscInt *d_nnz, *o_nnz; 7794 MatScalar *aa, val, *AA; 7795 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7796 if (isseqaij) { 7797 a = Amat; 7798 b = NULL; 7799 } else { 7800 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7801 a = d->A; 7802 b = d->B; 7803 } 7804 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7805 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7806 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7807 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7808 const PetscInt *cols1, *cols2; 7809 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7810 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7811 nnz[brow / bs] = nc2 / bs; 7812 if (nc2 % bs) ok = 0; 7813 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7814 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7815 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7816 if (nc1 != nc2) ok = 0; 7817 else { 7818 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7819 if (cols1[jj] != cols2[jj]) ok = 0; 7820 if (cols1[jj] % bs != jj % bs) ok = 0; 7821 } 7822 } 7823 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7824 } 7825 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7826 if (!ok) { 7827 PetscCall(PetscFree2(d_nnz, o_nnz)); 7828 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7829 goto old_bs; 7830 } 7831 } 7832 } 7833 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7834 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7835 PetscCall(PetscFree2(d_nnz, o_nnz)); 7836 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7837 // diag 7838 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7839 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7840 ai = aseq->i; 7841 n = ai[brow + 1] - ai[brow]; 7842 aj = aseq->j + ai[brow]; 7843 for (int k = 0; k < n; k += bs) { // block columns 7844 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7845 val = 0; 7846 if (index_size == 0) { 7847 for (int ii = 0; ii < bs; ii++) { // rows in block 7848 aa = aseq->a + ai[brow + ii] + k; 7849 for (int jj = 0; jj < bs; jj++) { // columns in block 7850 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7851 } 7852 } 7853 } else { // use (index,index) value if provided 7854 for (int iii = 0; iii < index_size; iii++) { // rows in block 7855 int ii = index[iii]; 7856 aa = aseq->a + ai[brow + ii] + k; 7857 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7858 int jj = index[jjj]; 7859 val += PetscAbs(PetscRealPart(aa[jj])); 7860 } 7861 } 7862 } 7863 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7864 AA[k / bs] = val; 7865 } 7866 grow = Istart / bs + brow / bs; 7867 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7868 } 7869 // off-diag 7870 if (ismpiaij) { 7871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7872 const PetscScalar *vals; 7873 const PetscInt *cols, *garray = aij->garray; 7874 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7875 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7876 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7877 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7878 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7879 AA[k / bs] = 0; 7880 AJ[cidx] = garray[cols[k]] / bs; 7881 } 7882 nc = ncols / bs; 7883 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7884 if (index_size == 0) { 7885 for (int ii = 0; ii < bs; ii++) { // rows in block 7886 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7887 for (int k = 0; k < ncols; k += bs) { 7888 for (int jj = 0; jj < bs; jj++) { // cols in block 7889 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7890 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7891 } 7892 } 7893 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7894 } 7895 } else { // use (index,index) value if provided 7896 for (int iii = 0; iii < index_size; iii++) { // rows in block 7897 int ii = index[iii]; 7898 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7899 for (int k = 0; k < ncols; k += bs) { 7900 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7901 int jj = index[jjj]; 7902 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7903 } 7904 } 7905 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7906 } 7907 } 7908 grow = Istart / bs + brow / bs; 7909 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7910 } 7911 } 7912 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7913 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7914 PetscCall(PetscFree2(AA, AJ)); 7915 } else { 7916 const PetscScalar *vals; 7917 const PetscInt *idx; 7918 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7919 old_bs: 7920 /* 7921 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7922 */ 7923 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7924 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7925 if (isseqaij) { 7926 PetscInt max_d_nnz; 7927 /* 7928 Determine exact preallocation count for (sequential) scalar matrix 7929 */ 7930 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7931 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7932 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7933 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7934 PetscCall(PetscFree3(w0, w1, w2)); 7935 } else if (ismpiaij) { 7936 Mat Daij, Oaij; 7937 const PetscInt *garray; 7938 PetscInt max_d_nnz; 7939 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7940 /* 7941 Determine exact preallocation count for diagonal block portion of scalar matrix 7942 */ 7943 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7944 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7945 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7946 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7947 PetscCall(PetscFree3(w0, w1, w2)); 7948 /* 7949 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7950 */ 7951 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7952 o_nnz[jj] = 0; 7953 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7954 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7955 o_nnz[jj] += ncols; 7956 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7957 } 7958 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7959 } 7960 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7961 /* get scalar copy (norms) of matrix */ 7962 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7963 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7964 PetscCall(PetscFree2(d_nnz, o_nnz)); 7965 for (Ii = Istart; Ii < Iend; Ii++) { 7966 PetscInt dest_row = Ii / bs; 7967 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7968 for (jj = 0; jj < ncols; jj++) { 7969 PetscInt dest_col = idx[jj] / bs; 7970 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7971 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7972 } 7973 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7974 } 7975 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7976 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7977 } 7978 } else { 7979 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7980 else { 7981 Gmat = Amat; 7982 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7983 } 7984 if (isseqaij) { 7985 a = Gmat; 7986 b = NULL; 7987 } else { 7988 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7989 a = d->A; 7990 b = d->B; 7991 } 7992 if (filter >= 0 || scale) { 7993 /* take absolute value of each entry */ 7994 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7995 MatInfo info; 7996 PetscScalar *avals; 7997 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7998 PetscCall(MatSeqAIJGetArray(c, &avals)); 7999 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8000 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8001 } 8002 } 8003 } 8004 if (symmetrize) { 8005 PetscBool isset, issym; 8006 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8007 if (!isset || !issym) { 8008 Mat matTrans; 8009 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8010 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8011 PetscCall(MatDestroy(&matTrans)); 8012 } 8013 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8014 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8015 if (scale) { 8016 /* scale c for all diagonal values = 1 or -1 */ 8017 Vec diag; 8018 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8019 PetscCall(MatGetDiagonal(Gmat, diag)); 8020 PetscCall(VecReciprocal(diag)); 8021 PetscCall(VecSqrtAbs(diag)); 8022 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8023 PetscCall(VecDestroy(&diag)); 8024 } 8025 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8026 8027 if (filter >= 0) { 8028 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8029 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8030 } 8031 *a_Gmat = Gmat; 8032 PetscFunctionReturn(PETSC_SUCCESS); 8033 } 8034 8035 /* 8036 Special version for direct calls from Fortran 8037 */ 8038 8039 /* Change these macros so can be used in void function */ 8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8041 #undef PetscCall 8042 #define PetscCall(...) \ 8043 do { \ 8044 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8045 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8046 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8047 return; \ 8048 } \ 8049 } while (0) 8050 8051 #undef SETERRQ 8052 #define SETERRQ(comm, ierr, ...) \ 8053 do { \ 8054 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8055 return; \ 8056 } while (0) 8057 8058 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8059 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8061 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8062 #else 8063 #endif 8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8065 { 8066 Mat mat = *mmat; 8067 PetscInt m = *mm, n = *mn; 8068 InsertMode addv = *maddv; 8069 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8070 PetscScalar value; 8071 8072 MatCheckPreallocated(mat, 1); 8073 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8074 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8075 { 8076 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8077 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8078 PetscBool roworiented = aij->roworiented; 8079 8080 /* Some Variables required in the macro */ 8081 Mat A = aij->A; 8082 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8083 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8084 MatScalar *aa; 8085 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8086 Mat B = aij->B; 8087 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8088 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8089 MatScalar *ba; 8090 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8091 * cannot use "#if defined" inside a macro. */ 8092 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8093 8094 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8095 PetscInt nonew = a->nonew; 8096 MatScalar *ap1, *ap2; 8097 8098 PetscFunctionBegin; 8099 PetscCall(MatSeqAIJGetArray(A, &aa)); 8100 PetscCall(MatSeqAIJGetArray(B, &ba)); 8101 for (i = 0; i < m; i++) { 8102 if (im[i] < 0) continue; 8103 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8104 if (im[i] >= rstart && im[i] < rend) { 8105 row = im[i] - rstart; 8106 lastcol1 = -1; 8107 rp1 = aj + ai[row]; 8108 ap1 = aa + ai[row]; 8109 rmax1 = aimax[row]; 8110 nrow1 = ailen[row]; 8111 low1 = 0; 8112 high1 = nrow1; 8113 lastcol2 = -1; 8114 rp2 = bj + bi[row]; 8115 ap2 = ba + bi[row]; 8116 rmax2 = bimax[row]; 8117 nrow2 = bilen[row]; 8118 low2 = 0; 8119 high2 = nrow2; 8120 8121 for (j = 0; j < n; j++) { 8122 if (roworiented) value = v[i * n + j]; 8123 else value = v[i + j * m]; 8124 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8125 if (in[j] >= cstart && in[j] < cend) { 8126 col = in[j] - cstart; 8127 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8128 } else if (in[j] < 0) continue; 8129 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8130 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8131 } else { 8132 if (mat->was_assembled) { 8133 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8134 #if defined(PETSC_USE_CTABLE) 8135 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8136 col--; 8137 #else 8138 col = aij->colmap[in[j]] - 1; 8139 #endif 8140 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8141 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8142 col = in[j]; 8143 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8144 B = aij->B; 8145 b = (Mat_SeqAIJ *)B->data; 8146 bimax = b->imax; 8147 bi = b->i; 8148 bilen = b->ilen; 8149 bj = b->j; 8150 rp2 = bj + bi[row]; 8151 ap2 = ba + bi[row]; 8152 rmax2 = bimax[row]; 8153 nrow2 = bilen[row]; 8154 low2 = 0; 8155 high2 = nrow2; 8156 bm = aij->B->rmap->n; 8157 ba = b->a; 8158 inserted = PETSC_FALSE; 8159 } 8160 } else col = in[j]; 8161 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8162 } 8163 } 8164 } else if (!aij->donotstash) { 8165 if (roworiented) { 8166 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8167 } else { 8168 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8169 } 8170 } 8171 } 8172 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8173 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8174 } 8175 PetscFunctionReturnVoid(); 8176 } 8177 8178 /* Undefining these here since they were redefined from their original definition above! No 8179 * other PETSc functions should be defined past this point, as it is impossible to recover the 8180 * original definitions */ 8181 #undef PetscCall 8182 #undef SETERRQ 8183