1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Key: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Level: beginner 131 132 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 133 enough exist. 134 135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Key: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; 1231 header[2] = N; 1232 header[3] = nz; 1233 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1234 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1235 1236 /* fill in and store row lengths */ 1237 PetscCall(PetscMalloc1(m, &rowlens)); 1238 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1239 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1240 PetscCall(PetscFree(rowlens)); 1241 1242 /* fill in and store column indices */ 1243 PetscCall(PetscMalloc1(nz, &colidxs)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 colidxs[cnt++] = garray[B->j[jb]]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1259 PetscCall(PetscMalloc1(nz, &matvals)); 1260 for (cnt = 0, i = 0; i < m; i++) { 1261 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1266 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1267 } 1268 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1269 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1270 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1271 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1272 PetscCall(PetscFree(matvals)); 1273 1274 /* write block size option to the viewer's .info file */ 1275 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 #include <petscdraw.h> 1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1283 PetscMPIInt rank = aij->rank, size = aij->size; 1284 PetscBool isdraw, iascii, isbinary; 1285 PetscViewer sviewer; 1286 PetscViewerFormat format; 1287 1288 PetscFunctionBegin; 1289 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1292 if (iascii) { 1293 PetscCall(PetscViewerGetFormat(viewer, &format)); 1294 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1295 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1296 PetscCall(PetscMalloc1(size, &nz)); 1297 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1298 for (i = 0; i < (PetscInt)size; i++) { 1299 nmax = PetscMax(nmax, nz[i]); 1300 nmin = PetscMin(nmin, nz[i]); 1301 navg += nz[i]; 1302 } 1303 PetscCall(PetscFree(nz)); 1304 navg = navg / size; 1305 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 PetscCall(PetscViewerGetFormat(viewer, &format)); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscInt *inodes = NULL; 1312 1313 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1314 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1315 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1316 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1317 if (!inodes) { 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1319 (double)info.memory)); 1320 } else { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } 1324 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1325 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1326 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1328 PetscCall(PetscViewerFlush(viewer)); 1329 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1330 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1331 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount, inodelimit, *inodes; 1335 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1336 if (inodes) { 1337 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1338 } else { 1339 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1340 } 1341 PetscFunctionReturn(PETSC_SUCCESS); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1348 PetscCall(MatView(aij->A, viewer)); 1349 } else { 1350 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1351 } 1352 PetscFunctionReturn(PETSC_SUCCESS); 1353 } else if (iascii && size == 1) { 1354 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1355 PetscCall(MatView(aij->A, viewer)); 1356 PetscFunctionReturn(PETSC_SUCCESS); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1361 PetscCall(PetscDrawIsNull(draw, &isnull)); 1362 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1363 } 1364 1365 { /* assemble the entire matrix onto first processor */ 1366 Mat A = NULL, Av; 1367 IS isrow, iscol; 1368 1369 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1371 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1372 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1373 /* The commented code uses MatCreateSubMatrices instead */ 1374 /* 1375 Mat *AA, A = NULL, Av; 1376 IS isrow,iscol; 1377 1378 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1380 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1381 if (rank == 0) { 1382 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1383 A = AA[0]; 1384 Av = AA[0]; 1385 } 1386 PetscCall(MatDestroySubMatrices(1,&AA)); 1387 */ 1388 PetscCall(ISDestroy(&iscol)); 1389 PetscCall(ISDestroy(&isrow)); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 if (rank == 0) { 1396 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1397 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1398 } 1399 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1400 PetscCall(PetscViewerFlush(viewer)); 1401 PetscCall(MatDestroy(&A)); 1402 } 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1407 { 1408 PetscBool iascii, isdraw, issocket, isbinary; 1409 1410 PetscFunctionBegin; 1411 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1412 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1413 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1415 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1420 { 1421 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1422 Vec bb1 = NULL; 1423 PetscBool hasop; 1424 1425 PetscFunctionBegin; 1426 if (flag == SOR_APPLY_UPPER) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1432 1433 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec, -1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec, -1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1465 } 1466 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1469 its--; 1470 } 1471 while (its--) { 1472 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 PetscCall(VecScale(mat->lvec, -1.0)); 1477 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1481 } 1482 } else if (flag & SOR_EISENSTAT) { 1483 Vec xx1; 1484 1485 PetscCall(VecDuplicate(bb, &xx1)); 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1487 1488 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1489 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1490 if (!mat->diag) { 1491 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1492 PetscCall(MatGetDiagonal(matin, mat->diag)); 1493 } 1494 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1495 if (hasop) { 1496 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1497 } else { 1498 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1499 } 1500 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1501 1502 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1503 1504 /* local sweep */ 1505 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1506 PetscCall(VecAXPY(xx, 1.0, xx1)); 1507 PetscCall(VecDestroy(&xx1)); 1508 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1509 1510 PetscCall(VecDestroy(&bb1)); 1511 1512 matin->factorerrortype = mat->A->factorerrortype; 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1517 { 1518 Mat aA, aB, Aperm; 1519 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1520 PetscScalar *aa, *ba; 1521 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1522 PetscSF rowsf, sf; 1523 IS parcolp = NULL; 1524 PetscBool done; 1525 1526 PetscFunctionBegin; 1527 PetscCall(MatGetLocalSize(A, &m, &n)); 1528 PetscCall(ISGetIndices(rowp, &rwant)); 1529 PetscCall(ISGetIndices(colp, &cwant)); 1530 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1531 1532 /* Invert row permutation to find out where my rows should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1534 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1535 PetscCall(PetscSFSetFromOptions(rowsf)); 1536 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1539 1540 /* Invert column permutation to find out where my columns should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1542 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1543 PetscCall(PetscSFSetFromOptions(sf)); 1544 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1547 PetscCall(PetscSFDestroy(&sf)); 1548 1549 PetscCall(ISRestoreIndices(rowp, &rwant)); 1550 PetscCall(ISRestoreIndices(colp, &cwant)); 1551 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1552 1553 /* Find out where my gcols should go */ 1554 PetscCall(MatGetSize(aB, NULL, &ng)); 1555 PetscCall(PetscMalloc1(ng, &gcdest)); 1556 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1557 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1558 PetscCall(PetscSFSetFromOptions(sf)); 1559 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1560 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1561 PetscCall(PetscSFDestroy(&sf)); 1562 1563 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1564 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1565 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1566 for (i = 0; i < m; i++) { 1567 PetscInt row = rdest[i]; 1568 PetscMPIInt rowner; 1569 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1570 for (j = ai[i]; j < ai[i + 1]; j++) { 1571 PetscInt col = cdest[aj[j]]; 1572 PetscMPIInt cowner; 1573 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 for (j = bi[i]; j < bi[i + 1]; j++) { 1578 PetscInt col = gcdest[bj[j]]; 1579 PetscMPIInt cowner; 1580 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1581 if (rowner == cowner) dnnz[i]++; 1582 else onnz[i]++; 1583 } 1584 } 1585 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1586 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1587 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1588 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1589 PetscCall(PetscSFDestroy(&rowsf)); 1590 1591 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1592 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1593 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1594 for (i = 0; i < m; i++) { 1595 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1596 PetscInt j0, rowlen; 1597 rowlen = ai[i + 1] - ai[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1599 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1601 } 1602 rowlen = bi[i + 1] - bi[i]; 1603 for (j0 = j = 0; j < rowlen; j0 = j) { 1604 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1605 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1606 } 1607 } 1608 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1609 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1610 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1611 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1612 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1613 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1614 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1615 PetscCall(PetscFree3(work, rdest, cdest)); 1616 PetscCall(PetscFree(gcdest)); 1617 if (parcolp) PetscCall(ISDestroy(&colp)); 1618 *B = Aperm; 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1623 { 1624 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1625 1626 PetscFunctionBegin; 1627 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1628 if (ghosts) *ghosts = aij->garray; 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1633 { 1634 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1635 Mat A = mat->A, B = mat->B; 1636 PetscLogDouble isend[5], irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1641 1642 isend[0] = info->nz_used; 1643 isend[1] = info->nz_allocated; 1644 isend[2] = info->nz_unneeded; 1645 isend[3] = info->memory; 1646 isend[4] = info->mallocs; 1647 1648 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1649 1650 isend[0] += info->nz_used; 1651 isend[1] += info->nz_allocated; 1652 isend[2] += info->nz_unneeded; 1653 isend[3] += info->memory; 1654 isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1687 1688 PetscFunctionBegin; 1689 switch (op) { 1690 case MAT_NEW_NONZERO_LOCATIONS: 1691 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1692 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1693 case MAT_KEEP_NONZERO_PATTERN: 1694 case MAT_NEW_NONZERO_LOCATION_ERR: 1695 case MAT_USE_INODES: 1696 case MAT_IGNORE_ZERO_ENTRIES: 1697 case MAT_FORM_EXPLICIT_TRANSPOSE: 1698 MatCheckPreallocated(A, 1); 1699 PetscCall(MatSetOption(a->A, op, flg)); 1700 PetscCall(MatSetOption(a->B, op, flg)); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A, 1); 1704 a->roworiented = flg; 1705 1706 PetscCall(MatSetOption(a->A, op, flg)); 1707 PetscCall(MatSetOption(a->B, op, flg)); 1708 break; 1709 case MAT_FORCE_DIAGONAL_ENTRIES: 1710 case MAT_SORTED_FULL: 1711 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1712 break; 1713 case MAT_IGNORE_OFF_PROC_ENTRIES: 1714 a->donotstash = flg; 1715 break; 1716 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1717 case MAT_SPD: 1718 case MAT_SYMMETRIC: 1719 case MAT_STRUCTURALLY_SYMMETRIC: 1720 case MAT_HERMITIAN: 1721 case MAT_SYMMETRY_ETERNAL: 1722 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1723 case MAT_SPD_ETERNAL: 1724 /* if the diagonal matrix is square it inherits some of the properties above */ 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1734 } 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1741 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1742 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1743 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1744 PetscInt *cmap, *idx_p; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1748 mat->getrowactive = PETSC_TRUE; 1749 1750 if (!mat->rowvalues && (idx || v)) { 1751 /* 1752 allocate enough space to hold information from the longest row. 1753 */ 1754 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1755 PetscInt max = 1, tmp; 1756 for (i = 0; i < matin->rmap->n; i++) { 1757 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1758 if (max < tmp) max = tmp; 1759 } 1760 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1761 } 1762 1763 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1764 lrow = row - rstart; 1765 1766 pvA = &vworkA; 1767 pcA = &cworkA; 1768 pvB = &vworkB; 1769 pcB = &cworkB; 1770 if (!v) { 1771 pvA = NULL; 1772 pvB = NULL; 1773 } 1774 if (!idx) { 1775 pcA = NULL; 1776 if (!v) pcB = NULL; 1777 } 1778 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1779 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1780 nztot = nzA + nzB; 1781 1782 cmap = mat->garray; 1783 if (v || idx) { 1784 if (nztot) { 1785 /* Sort by increasing column numbers, assuming A and B already sorted */ 1786 PetscInt imark = -1; 1787 if (v) { 1788 *v = v_p = mat->rowvalues; 1789 for (i = 0; i < nzB; i++) { 1790 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1791 else break; 1792 } 1793 imark = i; 1794 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1795 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1796 } 1797 if (idx) { 1798 *idx = idx_p = mat->rowindices; 1799 if (imark > -1) { 1800 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1801 } else { 1802 for (i = 0; i < nzB; i++) { 1803 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1804 else break; 1805 } 1806 imark = i; 1807 } 1808 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1809 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1810 } 1811 } else { 1812 if (idx) *idx = NULL; 1813 if (v) *v = NULL; 1814 } 1815 } 1816 *nz = nztot; 1817 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1818 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 1822 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1823 { 1824 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1825 1826 PetscFunctionBegin; 1827 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1828 aij->getrowactive = PETSC_FALSE; 1829 PetscFunctionReturn(PETSC_SUCCESS); 1830 } 1831 1832 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1833 { 1834 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1835 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1836 PetscInt i, j, cstart = mat->cmap->rstart; 1837 PetscReal sum = 0.0; 1838 const MatScalar *v, *amata, *bmata; 1839 1840 PetscFunctionBegin; 1841 if (aij->size == 1) { 1842 PetscCall(MatNorm(aij->A, type, norm)); 1843 } else { 1844 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1845 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1846 if (type == NORM_FROBENIUS) { 1847 v = amata; 1848 for (i = 0; i < amat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 v = bmata; 1853 for (i = 0; i < bmat->nz; i++) { 1854 sum += PetscRealPart(PetscConj(*v) * (*v)); 1855 v++; 1856 } 1857 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1858 *norm = PetscSqrtReal(*norm); 1859 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1860 } else if (type == NORM_1) { /* max column norm */ 1861 PetscReal *tmp, *tmp2; 1862 PetscInt *jj, *garray = aij->garray; 1863 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1864 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1865 *norm = 0.0; 1866 v = amata; 1867 jj = amat->j; 1868 for (j = 0; j < amat->nz; j++) { 1869 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 v = bmata; 1873 jj = bmat->j; 1874 for (j = 0; j < bmat->nz; j++) { 1875 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1876 v++; 1877 } 1878 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1879 for (j = 0; j < mat->cmap->N; j++) { 1880 if (tmp2[j] > *norm) *norm = tmp2[j]; 1881 } 1882 PetscCall(PetscFree(tmp)); 1883 PetscCall(PetscFree(tmp2)); 1884 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1885 } else if (type == NORM_INFINITY) { /* max row norm */ 1886 PetscReal ntemp = 0.0; 1887 for (j = 0; j < aij->A->rmap->n; j++) { 1888 v = amata + amat->i[j]; 1889 sum = 0.0; 1890 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 v = bmata + bmat->i[j]; 1895 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1896 sum += PetscAbsScalar(*v); 1897 v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1902 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1904 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1905 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1906 } 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1911 { 1912 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1913 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1914 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1915 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1916 Mat B, A_diag, *B_diag; 1917 const MatScalar *pbv, *bv; 1918 1919 PetscFunctionBegin; 1920 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1921 ma = A->rmap->n; 1922 na = A->cmap->n; 1923 mb = a->B->rmap->n; 1924 nb = a->B->cmap->n; 1925 ai = Aloc->i; 1926 aj = Aloc->j; 1927 bi = Bloc->i; 1928 bj = Bloc->j; 1929 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1930 PetscInt *d_nnz, *g_nnz, *o_nnz; 1931 PetscSFNode *oloc; 1932 PETSC_UNUSED PetscSF sf; 1933 1934 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1935 /* compute d_nnz for preallocation */ 1936 PetscCall(PetscArrayzero(d_nnz, na)); 1937 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1938 /* compute local off-diagonal contributions */ 1939 PetscCall(PetscArrayzero(g_nnz, nb)); 1940 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1941 /* map those to global */ 1942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1943 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1944 PetscCall(PetscSFSetFromOptions(sf)); 1945 PetscCall(PetscArrayzero(o_nnz, na)); 1946 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1947 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1948 PetscCall(PetscSFDestroy(&sf)); 1949 1950 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1951 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1952 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1953 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1954 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1955 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1956 } else { 1957 B = *matout; 1958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1959 } 1960 1961 b = (Mat_MPIAIJ *)B->data; 1962 A_diag = a->A; 1963 B_diag = &b->A; 1964 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1965 A_diag_ncol = A_diag->cmap->N; 1966 B_diag_ilen = sub_B_diag->ilen; 1967 B_diag_i = sub_B_diag->i; 1968 1969 /* Set ilen for diagonal of B */ 1970 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1971 1972 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1973 very quickly (=without using MatSetValues), because all writes are local. */ 1974 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1975 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1976 1977 /* copy over the B part */ 1978 PetscCall(PetscMalloc1(bi[mb], &cols)); 1979 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1980 pbv = bv; 1981 row = A->rmap->rstart; 1982 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i = 0; i < mb; i++) { 1985 ncol = bi[i + 1] - bi[i]; 1986 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1987 row++; 1988 pbv += ncol; 1989 cols_tmp += ncol; 1990 } 1991 PetscCall(PetscFree(cols)); 1992 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1993 1994 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1995 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1996 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1997 *matout = B; 1998 } else { 1999 PetscCall(MatHeaderMerge(A, &B)); 2000 } 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2007 Mat a = aij->A, b = aij->B; 2008 PetscInt s1, s2, s3; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2012 if (rr) { 2013 PetscCall(VecGetLocalSize(rr, &s1)); 2014 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2015 /* Overlap communication with computation. */ 2016 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2017 } 2018 if (ll) { 2019 PetscCall(VecGetLocalSize(ll, &s1)); 2020 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2021 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2022 } 2023 /* scale the diagonal block */ 2024 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2025 2026 if (rr) { 2027 /* Do a scatter end and then right scale the off-diagonal block */ 2028 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2029 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2030 } 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 2038 PetscFunctionBegin; 2039 PetscCall(MatSetUnfactored(a->A)); 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2044 { 2045 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2046 Mat a, b, c, d; 2047 PetscBool flg; 2048 2049 PetscFunctionBegin; 2050 a = matA->A; 2051 b = matA->B; 2052 c = matB->A; 2053 d = matB->B; 2054 2055 PetscCall(MatEqual(a, c, &flg)); 2056 if (flg) PetscCall(MatEqual(b, d, &flg)); 2057 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2058 PetscFunctionReturn(PETSC_SUCCESS); 2059 } 2060 2061 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2062 { 2063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2064 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2065 2066 PetscFunctionBegin; 2067 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2068 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2069 /* because of the column compression in the off-processor part of the matrix a->B, 2070 the number of columns in a->B and b->B may be different, hence we cannot call 2071 the MatCopy() directly on the two parts. If need be, we can provide a more 2072 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2073 then copying the submatrices */ 2074 PetscCall(MatCopy_Basic(A, B, str)); 2075 } else { 2076 PetscCall(MatCopy(a->A, b->A, str)); 2077 PetscCall(MatCopy(a->B, b->B, str)); 2078 } 2079 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* 2084 Computes the number of nonzeros per row needed for preallocation when X and Y 2085 have different nonzero structure. 2086 */ 2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2088 { 2089 PetscInt i, j, k, nzx, nzy; 2090 2091 PetscFunctionBegin; 2092 /* Set the number of nonzeros in the new matrix */ 2093 for (i = 0; i < m; i++) { 2094 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2095 nzx = xi[i + 1] - xi[i]; 2096 nzy = yi[i + 1] - yi[i]; 2097 nnz[i] = 0; 2098 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2099 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2100 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2101 nnz[i]++; 2102 } 2103 for (; k < nzy; k++) nnz[i]++; 2104 } 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2110 { 2111 PetscInt m = Y->rmap->N; 2112 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2113 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2114 2115 PetscFunctionBegin; 2116 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2117 PetscFunctionReturn(PETSC_SUCCESS); 2118 } 2119 2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2121 { 2122 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2123 2124 PetscFunctionBegin; 2125 if (str == SAME_NONZERO_PATTERN) { 2126 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2127 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2128 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2129 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2130 } else { 2131 Mat B; 2132 PetscInt *nnz_d, *nnz_o; 2133 2134 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2135 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2136 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2137 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2138 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2139 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2140 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2141 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2142 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2143 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2144 PetscCall(MatHeaderMerge(Y, &B)); 2145 PetscCall(PetscFree(nnz_d)); 2146 PetscCall(PetscFree(nnz_o)); 2147 } 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2152 2153 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2154 { 2155 PetscFunctionBegin; 2156 if (PetscDefined(USE_COMPLEX)) { 2157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2158 2159 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2160 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2161 } 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2166 { 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2168 2169 PetscFunctionBegin; 2170 PetscCall(MatRealPart(a->A)); 2171 PetscCall(MatRealPart(a->B)); 2172 PetscFunctionReturn(PETSC_SUCCESS); 2173 } 2174 2175 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2176 { 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 2179 PetscFunctionBegin; 2180 PetscCall(MatImaginaryPart(a->A)); 2181 PetscCall(MatImaginaryPart(a->B)); 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2188 PetscInt i, *idxb = NULL, m = A->rmap->n; 2189 PetscScalar *va, *vv; 2190 Vec vB, vA; 2191 const PetscScalar *vb; 2192 2193 PetscFunctionBegin; 2194 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2195 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2196 2197 PetscCall(VecGetArrayWrite(vA, &va)); 2198 if (idx) { 2199 for (i = 0; i < m; i++) { 2200 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2201 } 2202 } 2203 2204 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2205 PetscCall(PetscMalloc1(m, &idxb)); 2206 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2207 2208 PetscCall(VecGetArrayWrite(v, &vv)); 2209 PetscCall(VecGetArrayRead(vB, &vb)); 2210 for (i = 0; i < m; i++) { 2211 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2212 vv[i] = vb[i]; 2213 if (idx) idx[i] = a->garray[idxb[i]]; 2214 } else { 2215 vv[i] = va[i]; 2216 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2217 } 2218 } 2219 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2220 PetscCall(VecRestoreArrayWrite(vA, &va)); 2221 PetscCall(VecRestoreArrayRead(vB, &vb)); 2222 PetscCall(PetscFree(idxb)); 2223 PetscCall(VecDestroy(&vA)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2229 { 2230 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2231 PetscInt m = A->rmap->n, n = A->cmap->n; 2232 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2233 PetscInt *cmap = mat->garray; 2234 PetscInt *diagIdx, *offdiagIdx; 2235 Vec diagV, offdiagV; 2236 PetscScalar *a, *diagA, *offdiagA; 2237 const PetscScalar *ba, *bav; 2238 PetscInt r, j, col, ncols, *bi, *bj; 2239 Mat B = mat->B; 2240 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2241 2242 PetscFunctionBegin; 2243 /* When a process holds entire A and other processes have no entry */ 2244 if (A->cmap->N == n) { 2245 PetscCall(VecGetArrayWrite(v, &diagA)); 2246 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2247 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2248 PetscCall(VecDestroy(&diagV)); 2249 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } else if (n == 0) { 2252 if (m) { 2253 PetscCall(VecGetArrayWrite(v, &a)); 2254 for (r = 0; r < m; r++) { 2255 a[r] = 0.0; 2256 if (idx) idx[r] = -1; 2257 } 2258 PetscCall(VecRestoreArrayWrite(v, &a)); 2259 } 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2265 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2266 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2267 2268 /* Get offdiagIdx[] for implicit 0.0 */ 2269 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2270 ba = bav; 2271 bi = b->i; 2272 bj = b->j; 2273 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2274 for (r = 0; r < m; r++) { 2275 ncols = bi[r + 1] - bi[r]; 2276 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2277 offdiagA[r] = *ba; 2278 offdiagIdx[r] = cmap[0]; 2279 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2280 offdiagA[r] = 0.0; 2281 2282 /* Find first hole in the cmap */ 2283 for (j = 0; j < ncols; j++) { 2284 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2285 if (col > j && j < cstart) { 2286 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2287 break; 2288 } else if (col > j + n && j >= cstart) { 2289 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2290 break; 2291 } 2292 } 2293 if (j == ncols && ncols < A->cmap->N - n) { 2294 /* a hole is outside compressed Bcols */ 2295 if (ncols == 0) { 2296 if (cstart) { 2297 offdiagIdx[r] = 0; 2298 } else offdiagIdx[r] = cend; 2299 } else { /* ncols > 0 */ 2300 offdiagIdx[r] = cmap[ncols - 1] + 1; 2301 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2302 } 2303 } 2304 } 2305 2306 for (j = 0; j < ncols; j++) { 2307 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2308 offdiagA[r] = *ba; 2309 offdiagIdx[r] = cmap[*bj]; 2310 } 2311 ba++; 2312 bj++; 2313 } 2314 } 2315 2316 PetscCall(VecGetArrayWrite(v, &a)); 2317 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2318 for (r = 0; r < m; ++r) { 2319 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2320 a[r] = diagA[r]; 2321 if (idx) idx[r] = cstart + diagIdx[r]; 2322 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) { 2325 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2326 idx[r] = cstart + diagIdx[r]; 2327 } else idx[r] = offdiagIdx[r]; 2328 } 2329 } else { 2330 a[r] = offdiagA[r]; 2331 if (idx) idx[r] = offdiagIdx[r]; 2332 } 2333 } 2334 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2335 PetscCall(VecRestoreArrayWrite(v, &a)); 2336 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2337 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2338 PetscCall(VecDestroy(&diagV)); 2339 PetscCall(VecDestroy(&offdiagV)); 2340 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2347 PetscInt m = A->rmap->n, n = A->cmap->n; 2348 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2349 PetscInt *cmap = mat->garray; 2350 PetscInt *diagIdx, *offdiagIdx; 2351 Vec diagV, offdiagV; 2352 PetscScalar *a, *diagA, *offdiagA; 2353 const PetscScalar *ba, *bav; 2354 PetscInt r, j, col, ncols, *bi, *bj; 2355 Mat B = mat->B; 2356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2357 2358 PetscFunctionBegin; 2359 /* When a process holds entire A and other processes have no entry */ 2360 if (A->cmap->N == n) { 2361 PetscCall(VecGetArrayWrite(v, &diagA)); 2362 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2364 PetscCall(VecDestroy(&diagV)); 2365 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } else if (n == 0) { 2368 if (m) { 2369 PetscCall(VecGetArrayWrite(v, &a)); 2370 for (r = 0; r < m; r++) { 2371 a[r] = PETSC_MAX_REAL; 2372 if (idx) idx[r] = -1; 2373 } 2374 PetscCall(VecRestoreArrayWrite(v, &a)); 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2381 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2382 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2383 2384 /* Get offdiagIdx[] for implicit 0.0 */ 2385 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2386 ba = bav; 2387 bi = b->i; 2388 bj = b->j; 2389 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2390 for (r = 0; r < m; r++) { 2391 ncols = bi[r + 1] - bi[r]; 2392 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2393 offdiagA[r] = *ba; 2394 offdiagIdx[r] = cmap[0]; 2395 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2396 offdiagA[r] = 0.0; 2397 2398 /* Find first hole in the cmap */ 2399 for (j = 0; j < ncols; j++) { 2400 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2401 if (col > j && j < cstart) { 2402 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2403 break; 2404 } else if (col > j + n && j >= cstart) { 2405 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2406 break; 2407 } 2408 } 2409 if (j == ncols && ncols < A->cmap->N - n) { 2410 /* a hole is outside compressed Bcols */ 2411 if (ncols == 0) { 2412 if (cstart) { 2413 offdiagIdx[r] = 0; 2414 } else offdiagIdx[r] = cend; 2415 } else { /* ncols > 0 */ 2416 offdiagIdx[r] = cmap[ncols - 1] + 1; 2417 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2418 } 2419 } 2420 } 2421 2422 for (j = 0; j < ncols; j++) { 2423 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2424 offdiagA[r] = *ba; 2425 offdiagIdx[r] = cmap[*bj]; 2426 } 2427 ba++; 2428 bj++; 2429 } 2430 } 2431 2432 PetscCall(VecGetArrayWrite(v, &a)); 2433 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2434 for (r = 0; r < m; ++r) { 2435 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 if (idx) idx[r] = cstart + diagIdx[r]; 2438 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) { 2441 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2442 idx[r] = cstart + diagIdx[r]; 2443 } else idx[r] = offdiagIdx[r]; 2444 } 2445 } else { 2446 a[r] = offdiagA[r]; 2447 if (idx) idx[r] = offdiagIdx[r]; 2448 } 2449 } 2450 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2451 PetscCall(VecRestoreArrayWrite(v, &a)); 2452 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2453 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2454 PetscCall(VecDestroy(&diagV)); 2455 PetscCall(VecDestroy(&offdiagV)); 2456 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2461 { 2462 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2463 PetscInt m = A->rmap->n, n = A->cmap->n; 2464 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2465 PetscInt *cmap = mat->garray; 2466 PetscInt *diagIdx, *offdiagIdx; 2467 Vec diagV, offdiagV; 2468 PetscScalar *a, *diagA, *offdiagA; 2469 const PetscScalar *ba, *bav; 2470 PetscInt r, j, col, ncols, *bi, *bj; 2471 Mat B = mat->B; 2472 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2473 2474 PetscFunctionBegin; 2475 /* When a process holds entire A and other processes have no entry */ 2476 if (A->cmap->N == n) { 2477 PetscCall(VecGetArrayWrite(v, &diagA)); 2478 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2479 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2480 PetscCall(VecDestroy(&diagV)); 2481 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } else if (n == 0) { 2484 if (m) { 2485 PetscCall(VecGetArrayWrite(v, &a)); 2486 for (r = 0; r < m; r++) { 2487 a[r] = PETSC_MIN_REAL; 2488 if (idx) idx[r] = -1; 2489 } 2490 PetscCall(VecRestoreArrayWrite(v, &a)); 2491 } 2492 PetscFunctionReturn(PETSC_SUCCESS); 2493 } 2494 2495 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2497 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2498 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2499 2500 /* Get offdiagIdx[] for implicit 0.0 */ 2501 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2502 ba = bav; 2503 bi = b->i; 2504 bj = b->j; 2505 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2506 for (r = 0; r < m; r++) { 2507 ncols = bi[r + 1] - bi[r]; 2508 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2509 offdiagA[r] = *ba; 2510 offdiagIdx[r] = cmap[0]; 2511 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2512 offdiagA[r] = 0.0; 2513 2514 /* Find first hole in the cmap */ 2515 for (j = 0; j < ncols; j++) { 2516 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2517 if (col > j && j < cstart) { 2518 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2519 break; 2520 } else if (col > j + n && j >= cstart) { 2521 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2522 break; 2523 } 2524 } 2525 if (j == ncols && ncols < A->cmap->N - n) { 2526 /* a hole is outside compressed Bcols */ 2527 if (ncols == 0) { 2528 if (cstart) { 2529 offdiagIdx[r] = 0; 2530 } else offdiagIdx[r] = cend; 2531 } else { /* ncols > 0 */ 2532 offdiagIdx[r] = cmap[ncols - 1] + 1; 2533 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2534 } 2535 } 2536 } 2537 2538 for (j = 0; j < ncols; j++) { 2539 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2540 offdiagA[r] = *ba; 2541 offdiagIdx[r] = cmap[*bj]; 2542 } 2543 ba++; 2544 bj++; 2545 } 2546 } 2547 2548 PetscCall(VecGetArrayWrite(v, &a)); 2549 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2550 for (r = 0; r < m; ++r) { 2551 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) idx[r] = cstart + diagIdx[r]; 2554 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) { 2557 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2558 idx[r] = cstart + diagIdx[r]; 2559 } else idx[r] = offdiagIdx[r]; 2560 } 2561 } else { 2562 a[r] = offdiagA[r]; 2563 if (idx) idx[r] = offdiagIdx[r]; 2564 } 2565 } 2566 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2567 PetscCall(VecRestoreArrayWrite(v, &a)); 2568 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2569 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2570 PetscCall(VecDestroy(&diagV)); 2571 PetscCall(VecDestroy(&offdiagV)); 2572 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2577 { 2578 Mat *dummy; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2582 *newmat = *dummy; 2583 PetscCall(PetscFree(dummy)); 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2590 2591 PetscFunctionBegin; 2592 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2593 A->factorerrortype = a->A->factorerrortype; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2598 { 2599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2600 2601 PetscFunctionBegin; 2602 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2603 PetscCall(MatSetRandom(aij->A, rctx)); 2604 if (x->assembled) { 2605 PetscCall(MatSetRandom(aij->B, rctx)); 2606 } else { 2607 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2608 } 2609 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2610 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2611 PetscFunctionReturn(PETSC_SUCCESS); 2612 } 2613 2614 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2615 { 2616 PetscFunctionBegin; 2617 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2618 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2619 PetscFunctionReturn(PETSC_SUCCESS); 2620 } 2621 2622 /*@ 2623 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2624 2625 Not Collective 2626 2627 Input Parameter: 2628 . A - the matrix 2629 2630 Output Parameter: 2631 . nz - the number of nonzeros 2632 2633 Level: advanced 2634 2635 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2636 @*/ 2637 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2638 { 2639 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2640 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2641 2642 PetscFunctionBegin; 2643 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@ 2648 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2649 2650 Collective 2651 2652 Input Parameters: 2653 + A - the matrix 2654 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2655 2656 Level: advanced 2657 2658 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2659 @*/ 2660 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2661 { 2662 PetscFunctionBegin; 2663 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2668 { 2669 PetscBool sc = PETSC_FALSE, flg; 2670 2671 PetscFunctionBegin; 2672 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2673 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2674 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2675 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2676 PetscOptionsHeadEnd(); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2681 { 2682 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2683 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2684 2685 PetscFunctionBegin; 2686 if (!Y->preallocated) { 2687 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2688 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2689 PetscInt nonew = aij->nonew; 2690 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2691 aij->nonew = nonew; 2692 } 2693 PetscCall(MatShift_Basic(Y, a)); 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2700 2701 PetscFunctionBegin; 2702 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2703 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2704 if (d) { 2705 PetscInt rstart; 2706 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2707 *d += rstart; 2708 } 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2715 2716 PetscFunctionBegin; 2717 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2718 PetscFunctionReturn(PETSC_SUCCESS); 2719 } 2720 2721 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2722 { 2723 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2724 2725 PetscFunctionBegin; 2726 PetscCall(MatEliminateZeros(a->A)); 2727 PetscCall(MatEliminateZeros(a->B)); 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2732 MatGetRow_MPIAIJ, 2733 MatRestoreRow_MPIAIJ, 2734 MatMult_MPIAIJ, 2735 /* 4*/ MatMultAdd_MPIAIJ, 2736 MatMultTranspose_MPIAIJ, 2737 MatMultTransposeAdd_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*10*/ NULL, 2742 NULL, 2743 NULL, 2744 MatSOR_MPIAIJ, 2745 MatTranspose_MPIAIJ, 2746 /*15*/ MatGetInfo_MPIAIJ, 2747 MatEqual_MPIAIJ, 2748 MatGetDiagonal_MPIAIJ, 2749 MatDiagonalScale_MPIAIJ, 2750 MatNorm_MPIAIJ, 2751 /*20*/ MatAssemblyBegin_MPIAIJ, 2752 MatAssemblyEnd_MPIAIJ, 2753 MatSetOption_MPIAIJ, 2754 MatZeroEntries_MPIAIJ, 2755 /*24*/ MatZeroRows_MPIAIJ, 2756 NULL, 2757 NULL, 2758 NULL, 2759 NULL, 2760 /*29*/ MatSetUp_MPI_Hash, 2761 NULL, 2762 NULL, 2763 MatGetDiagonalBlock_MPIAIJ, 2764 NULL, 2765 /*34*/ MatDuplicate_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*39*/ MatAXPY_MPIAIJ, 2771 MatCreateSubMatrices_MPIAIJ, 2772 MatIncreaseOverlap_MPIAIJ, 2773 MatGetValues_MPIAIJ, 2774 MatCopy_MPIAIJ, 2775 /*44*/ MatGetRowMax_MPIAIJ, 2776 MatScale_MPIAIJ, 2777 MatShift_MPIAIJ, 2778 MatDiagonalSet_MPIAIJ, 2779 MatZeroRowsColumns_MPIAIJ, 2780 /*49*/ MatSetRandom_MPIAIJ, 2781 MatGetRowIJ_MPIAIJ, 2782 MatRestoreRowIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*54*/ MatFDColoringCreate_MPIXAIJ, 2786 NULL, 2787 MatSetUnfactored_MPIAIJ, 2788 MatPermute_MPIAIJ, 2789 NULL, 2790 /*59*/ MatCreateSubMatrix_MPIAIJ, 2791 MatDestroy_MPIAIJ, 2792 MatView_MPIAIJ, 2793 NULL, 2794 NULL, 2795 /*64*/ NULL, 2796 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2801 MatGetRowMinAbs_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*75*/ MatFDColoringApply_AIJ, 2807 MatSetFromOptions_MPIAIJ, 2808 NULL, 2809 NULL, 2810 MatFindZeroDiagonals_MPIAIJ, 2811 /*80*/ NULL, 2812 NULL, 2813 NULL, 2814 /*83*/ MatLoad_MPIAIJ, 2815 MatIsSymmetric_MPIAIJ, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*89*/ NULL, 2821 NULL, 2822 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 NULL, 2829 MatBindToCPU_MPIAIJ, 2830 /*99*/ MatProductSetFromOptions_MPIAIJ, 2831 NULL, 2832 NULL, 2833 MatConjugate_MPIAIJ, 2834 NULL, 2835 /*104*/ MatSetValuesRow_MPIAIJ, 2836 MatRealPart_MPIAIJ, 2837 MatImaginaryPart_MPIAIJ, 2838 NULL, 2839 NULL, 2840 /*109*/ NULL, 2841 NULL, 2842 MatGetRowMin_MPIAIJ, 2843 NULL, 2844 MatMissingDiagonal_MPIAIJ, 2845 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2846 NULL, 2847 MatGetGhosts_MPIAIJ, 2848 NULL, 2849 NULL, 2850 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2851 NULL, 2852 NULL, 2853 NULL, 2854 MatGetMultiProcBlock_MPIAIJ, 2855 /*124*/ MatFindNonzeroRows_MPIAIJ, 2856 MatGetColumnReductions_MPIAIJ, 2857 MatInvertBlockDiagonal_MPIAIJ, 2858 MatInvertVariableBlockDiagonal_MPIAIJ, 2859 MatCreateSubMatricesMPI_MPIAIJ, 2860 /*129*/ NULL, 2861 NULL, 2862 NULL, 2863 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2864 NULL, 2865 /*134*/ NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*139*/ MatSetBlockSizes_MPIAIJ, 2871 NULL, 2872 NULL, 2873 MatFDColoringSetUp_MPIXAIJ, 2874 MatFindOffBlockDiagonalEntries_MPIAIJ, 2875 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2876 /*145*/ NULL, 2877 NULL, 2878 NULL, 2879 MatCreateGraph_Simple_AIJ, 2880 NULL, 2881 /*150*/ NULL, 2882 MatEliminateZeros_MPIAIJ}; 2883 2884 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatStoreValues(aij->A)); 2890 PetscCall(MatStoreValues(aij->B)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2895 { 2896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2897 2898 PetscFunctionBegin; 2899 PetscCall(MatRetrieveValues(aij->A)); 2900 PetscCall(MatRetrieveValues(aij->B)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2905 { 2906 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2907 PetscMPIInt size; 2908 2909 PetscFunctionBegin; 2910 if (B->hash_active) { 2911 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2912 B->hash_active = PETSC_FALSE; 2913 } 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 2917 #if defined(PETSC_USE_CTABLE) 2918 PetscCall(PetscHMapIDestroy(&b->colmap)); 2919 #else 2920 PetscCall(PetscFree(b->colmap)); 2921 #endif 2922 PetscCall(PetscFree(b->garray)); 2923 PetscCall(VecDestroy(&b->lvec)); 2924 PetscCall(VecScatterDestroy(&b->Mvctx)); 2925 2926 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 2933 PetscCall(MatDestroy(&b->A)); 2934 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2935 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2936 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2937 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2938 2939 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2940 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2941 B->preallocated = PETSC_TRUE; 2942 B->was_assembled = PETSC_FALSE; 2943 B->assembled = PETSC_FALSE; 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2948 { 2949 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2950 2951 PetscFunctionBegin; 2952 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 2956 #if defined(PETSC_USE_CTABLE) 2957 PetscCall(PetscHMapIDestroy(&b->colmap)); 2958 #else 2959 PetscCall(PetscFree(b->colmap)); 2960 #endif 2961 PetscCall(PetscFree(b->garray)); 2962 PetscCall(VecDestroy(&b->lvec)); 2963 PetscCall(VecScatterDestroy(&b->Mvctx)); 2964 2965 PetscCall(MatResetPreallocation(b->A)); 2966 PetscCall(MatResetPreallocation(b->B)); 2967 B->preallocated = PETSC_TRUE; 2968 B->was_assembled = PETSC_FALSE; 2969 B->assembled = PETSC_FALSE; 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2974 { 2975 Mat mat; 2976 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2977 2978 PetscFunctionBegin; 2979 *newmat = NULL; 2980 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2981 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2982 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2983 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2984 a = (Mat_MPIAIJ *)mat->data; 2985 2986 mat->factortype = matin->factortype; 2987 mat->assembled = matin->assembled; 2988 mat->insertmode = NOT_SET_VALUES; 2989 mat->preallocated = matin->preallocated; 2990 2991 a->size = oldmat->size; 2992 a->rank = oldmat->rank; 2993 a->donotstash = oldmat->donotstash; 2994 a->roworiented = oldmat->roworiented; 2995 a->rowindices = NULL; 2996 a->rowvalues = NULL; 2997 a->getrowactive = PETSC_FALSE; 2998 2999 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3000 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3001 3002 if (oldmat->colmap) { 3003 #if defined(PETSC_USE_CTABLE) 3004 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3005 #else 3006 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3007 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3008 #endif 3009 } else a->colmap = NULL; 3010 if (oldmat->garray) { 3011 PetscInt len; 3012 len = oldmat->B->cmap->n; 3013 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3014 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3015 } else a->garray = NULL; 3016 3017 /* It may happen MatDuplicate is called with a non-assembled matrix 3018 In fact, MatDuplicate only requires the matrix to be preallocated 3019 This may happen inside a DMCreateMatrix_Shell */ 3020 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3021 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3022 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3023 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3024 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3025 *newmat = mat; 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3030 { 3031 PetscBool isbinary, ishdf5; 3032 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3035 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3036 /* force binary viewer to load .info file if it has not yet done so */ 3037 PetscCall(PetscViewerSetUp(viewer)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3040 if (isbinary) { 3041 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3042 } else if (ishdf5) { 3043 #if defined(PETSC_HAVE_HDF5) 3044 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3045 #else 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3047 #endif 3048 } else { 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3055 { 3056 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3057 PetscInt *rowidxs, *colidxs; 3058 PetscScalar *matvals; 3059 3060 PetscFunctionBegin; 3061 PetscCall(PetscViewerSetUp(viewer)); 3062 3063 /* read in matrix header */ 3064 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3065 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3066 M = header[1]; 3067 N = header[2]; 3068 nz = header[3]; 3069 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3070 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3071 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3072 3073 /* set block sizes from the viewer's .info file */ 3074 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3075 /* set global sizes if not set already */ 3076 if (mat->rmap->N < 0) mat->rmap->N = M; 3077 if (mat->cmap->N < 0) mat->cmap->N = N; 3078 PetscCall(PetscLayoutSetUp(mat->rmap)); 3079 PetscCall(PetscLayoutSetUp(mat->cmap)); 3080 3081 /* check if the matrix sizes are correct */ 3082 PetscCall(MatGetSize(mat, &rows, &cols)); 3083 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3084 3085 /* read in row lengths and build row indices */ 3086 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3087 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3088 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3089 rowidxs[0] = 0; 3090 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3091 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 /* read in column indices and matrix values */ 3094 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3095 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3097 /* store matrix indices and values */ 3098 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3099 PetscCall(PetscFree(rowidxs)); 3100 PetscCall(PetscFree2(colidxs, matvals)); 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 3104 /* Not scalable because of ISAllGather() unless getting all columns. */ 3105 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3106 { 3107 IS iscol_local; 3108 PetscBool isstride; 3109 PetscMPIInt lisstride = 0, gisstride; 3110 3111 PetscFunctionBegin; 3112 /* check if we are grabbing all columns*/ 3113 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3114 3115 if (isstride) { 3116 PetscInt start, len, mstart, mlen; 3117 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3118 PetscCall(ISGetLocalSize(iscol, &len)); 3119 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3120 if (mstart == start && mlen - mstart == len) lisstride = 1; 3121 } 3122 3123 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3124 if (gisstride) { 3125 PetscInt N; 3126 PetscCall(MatGetSize(mat, NULL, &N)); 3127 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3128 PetscCall(ISSetIdentity(iscol_local)); 3129 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3130 } else { 3131 PetscInt cbs; 3132 PetscCall(ISGetBlockSize(iscol, &cbs)); 3133 PetscCall(ISAllGather(iscol, &iscol_local)); 3134 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3135 } 3136 3137 *isseq = iscol_local; 3138 PetscFunctionReturn(PETSC_SUCCESS); 3139 } 3140 3141 /* 3142 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3143 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3144 3145 Input Parameters: 3146 + mat - matrix 3147 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3148 i.e., mat->rstart <= isrow[i] < mat->rend 3149 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->cstart <= iscol[i] < mat->cend 3151 3152 Output Parameters: 3153 + isrow_d - sequential row index set for retrieving mat->A 3154 . iscol_d - sequential column index set for retrieving mat->A 3155 . iscol_o - sequential column index set for retrieving mat->B 3156 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3157 */ 3158 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3159 { 3160 Vec x, cmap; 3161 const PetscInt *is_idx; 3162 PetscScalar *xarray, *cmaparray; 3163 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3165 Mat B = a->B; 3166 Vec lvec = a->lvec, lcmap; 3167 PetscInt i, cstart, cend, Bn = B->cmap->N; 3168 MPI_Comm comm; 3169 VecScatter Mvctx = a->Mvctx; 3170 3171 PetscFunctionBegin; 3172 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3173 PetscCall(ISGetLocalSize(iscol, &ncols)); 3174 3175 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3176 PetscCall(MatCreateVecs(mat, &x, NULL)); 3177 PetscCall(VecSet(x, -1.0)); 3178 PetscCall(VecDuplicate(x, &cmap)); 3179 PetscCall(VecSet(cmap, -1.0)); 3180 3181 /* Get start indices */ 3182 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3183 isstart -= ncols; 3184 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3185 3186 PetscCall(ISGetIndices(iscol, &is_idx)); 3187 PetscCall(VecGetArray(x, &xarray)); 3188 PetscCall(VecGetArray(cmap, &cmaparray)); 3189 PetscCall(PetscMalloc1(ncols, &idx)); 3190 for (i = 0; i < ncols; i++) { 3191 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3192 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3193 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3194 } 3195 PetscCall(VecRestoreArray(x, &xarray)); 3196 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3197 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3198 3199 /* Get iscol_d */ 3200 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3201 PetscCall(ISGetBlockSize(iscol, &i)); 3202 PetscCall(ISSetBlockSize(*iscol_d, i)); 3203 3204 /* Get isrow_d */ 3205 PetscCall(ISGetLocalSize(isrow, &m)); 3206 rstart = mat->rmap->rstart; 3207 PetscCall(PetscMalloc1(m, &idx)); 3208 PetscCall(ISGetIndices(isrow, &is_idx)); 3209 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3210 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3211 3212 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3213 PetscCall(ISGetBlockSize(isrow, &i)); 3214 PetscCall(ISSetBlockSize(*isrow_d, i)); 3215 3216 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3217 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3219 3220 PetscCall(VecDuplicate(lvec, &lcmap)); 3221 3222 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3224 3225 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3226 /* off-process column indices */ 3227 count = 0; 3228 PetscCall(PetscMalloc1(Bn, &idx)); 3229 PetscCall(PetscMalloc1(Bn, &cmap1)); 3230 3231 PetscCall(VecGetArray(lvec, &xarray)); 3232 PetscCall(VecGetArray(lcmap, &cmaparray)); 3233 for (i = 0; i < Bn; i++) { 3234 if (PetscRealPart(xarray[i]) > -1.0) { 3235 idx[count] = i; /* local column index in off-diagonal part B */ 3236 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3237 count++; 3238 } 3239 } 3240 PetscCall(VecRestoreArray(lvec, &xarray)); 3241 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3242 3243 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3244 /* cannot ensure iscol_o has same blocksize as iscol! */ 3245 3246 PetscCall(PetscFree(idx)); 3247 *garray = cmap1; 3248 3249 PetscCall(VecDestroy(&x)); 3250 PetscCall(VecDestroy(&cmap)); 3251 PetscCall(VecDestroy(&lcmap)); 3252 PetscFunctionReturn(PETSC_SUCCESS); 3253 } 3254 3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3257 { 3258 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3259 Mat M = NULL; 3260 MPI_Comm comm; 3261 IS iscol_d, isrow_d, iscol_o; 3262 Mat Asub = NULL, Bsub = NULL; 3263 PetscInt n; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3267 3268 if (call == MAT_REUSE_MATRIX) { 3269 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3271 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3274 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3277 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3278 3279 /* Update diagonal and off-diagonal portions of submat */ 3280 asub = (Mat_MPIAIJ *)(*submat)->data; 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3282 PetscCall(ISGetLocalSize(iscol_o, &n)); 3283 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3284 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3285 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3286 3287 } else { /* call == MAT_INITIAL_MATRIX) */ 3288 const PetscInt *garray; 3289 PetscInt BsubN; 3290 3291 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3292 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3293 3294 /* Create local submatrices Asub and Bsub */ 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3296 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3297 3298 /* Create submatrix M */ 3299 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3300 3301 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3302 asub = (Mat_MPIAIJ *)M->data; 3303 3304 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3305 n = asub->B->cmap->N; 3306 if (BsubN > n) { 3307 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3308 const PetscInt *idx; 3309 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3310 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3311 3312 PetscCall(PetscMalloc1(n, &idx_new)); 3313 j = 0; 3314 PetscCall(ISGetIndices(iscol_o, &idx)); 3315 for (i = 0; i < n; i++) { 3316 if (j >= BsubN) break; 3317 while (subgarray[i] > garray[j]) j++; 3318 3319 if (subgarray[i] == garray[j]) { 3320 idx_new[i] = idx[j++]; 3321 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3322 } 3323 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3324 3325 PetscCall(ISDestroy(&iscol_o)); 3326 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3327 3328 } else if (BsubN < n) { 3329 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3330 } 3331 3332 PetscCall(PetscFree(garray)); 3333 *submat = M; 3334 3335 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3336 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3337 PetscCall(ISDestroy(&isrow_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3340 PetscCall(ISDestroy(&iscol_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3343 PetscCall(ISDestroy(&iscol_o)); 3344 } 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3349 { 3350 IS iscol_local = NULL, isrow_d; 3351 PetscInt csize; 3352 PetscInt n, i, j, start, end; 3353 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3354 MPI_Comm comm; 3355 3356 PetscFunctionBegin; 3357 /* If isrow has same processor distribution as mat, 3358 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3359 if (call == MAT_REUSE_MATRIX) { 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3361 if (isrow_d) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3364 } else { 3365 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3366 if (iscol_local) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3369 } 3370 } 3371 } else { 3372 /* Check if isrow has same processor distribution as mat */ 3373 sameDist[0] = PETSC_FALSE; 3374 PetscCall(ISGetLocalSize(isrow, &n)); 3375 if (!n) { 3376 sameDist[0] = PETSC_TRUE; 3377 } else { 3378 PetscCall(ISGetMinMax(isrow, &i, &j)); 3379 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3380 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3381 } 3382 3383 /* Check if iscol has same processor distribution as mat */ 3384 sameDist[1] = PETSC_FALSE; 3385 PetscCall(ISGetLocalSize(iscol, &n)); 3386 if (!n) { 3387 sameDist[1] = PETSC_TRUE; 3388 } else { 3389 PetscCall(ISGetMinMax(iscol, &i, &j)); 3390 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3391 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3392 } 3393 3394 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3395 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3396 sameRowDist = tsameDist[0]; 3397 } 3398 3399 if (sameRowDist) { 3400 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3401 /* isrow and iscol have same processor distribution as mat */ 3402 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } else { /* sameRowDist */ 3405 /* isrow has same processor distribution as mat */ 3406 if (call == MAT_INITIAL_MATRIX) { 3407 PetscBool sorted; 3408 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3409 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3410 PetscCall(ISGetSize(iscol, &i)); 3411 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3412 3413 PetscCall(ISSorted(iscol_local, &sorted)); 3414 if (sorted) { 3415 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 } else { /* call == MAT_REUSE_MATRIX */ 3420 IS iscol_sub; 3421 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3422 if (iscol_sub) { 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 } 3427 } 3428 } 3429 3430 /* General case: iscol -> iscol_local which has global size of iscol */ 3431 if (call == MAT_REUSE_MATRIX) { 3432 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3433 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3434 } else { 3435 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3436 } 3437 3438 PetscCall(ISGetLocalSize(iscol, &csize)); 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3440 3441 if (call == MAT_INITIAL_MATRIX) { 3442 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3443 PetscCall(ISDestroy(&iscol_local)); 3444 } 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /*@C 3449 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3450 and "off-diagonal" part of the matrix in CSR format. 3451 3452 Collective 3453 3454 Input Parameters: 3455 + comm - MPI communicator 3456 . A - "diagonal" portion of matrix 3457 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3458 - garray - global index of `B` columns 3459 3460 Output Parameter: 3461 . mat - the matrix, with input `A` as its local diagonal matrix 3462 3463 Level: advanced 3464 3465 Notes: 3466 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3467 3468 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3469 3470 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3471 @*/ 3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3473 { 3474 Mat_MPIAIJ *maij; 3475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3476 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3477 const PetscScalar *oa; 3478 Mat Bnew; 3479 PetscInt m, n, N; 3480 MatType mpi_mat_type; 3481 3482 PetscFunctionBegin; 3483 PetscCall(MatCreate(comm, mat)); 3484 PetscCall(MatGetSize(A, &m, &n)); 3485 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3486 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3487 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3488 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3489 3490 /* Get global columns of mat */ 3491 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3492 3493 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3494 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3495 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3496 PetscCall(MatSetType(*mat, mpi_mat_type)); 3497 3498 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3499 maij = (Mat_MPIAIJ *)(*mat)->data; 3500 3501 (*mat)->preallocated = PETSC_TRUE; 3502 3503 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3504 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3505 3506 /* Set A as diagonal portion of *mat */ 3507 maij->A = A; 3508 3509 nz = oi[m]; 3510 for (i = 0; i < nz; i++) { 3511 col = oj[i]; 3512 oj[i] = garray[col]; 3513 } 3514 3515 /* Set Bnew as off-diagonal portion of *mat */ 3516 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3517 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3518 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3519 bnew = (Mat_SeqAIJ *)Bnew->data; 3520 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3521 maij->B = Bnew; 3522 3523 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3524 3525 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3526 b->free_a = PETSC_FALSE; 3527 b->free_ij = PETSC_FALSE; 3528 PetscCall(MatDestroy(&B)); 3529 3530 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3531 bnew->free_a = PETSC_TRUE; 3532 bnew->free_ij = PETSC_TRUE; 3533 3534 /* condense columns of maij->B */ 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3536 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3539 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3544 3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3546 { 3547 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3548 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3550 Mat M, Msub, B = a->B; 3551 MatScalar *aa; 3552 Mat_SeqAIJ *aij; 3553 PetscInt *garray = a->garray, *colsub, Ncols; 3554 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3555 IS iscol_sub, iscmap; 3556 const PetscInt *is_idx, *cmap; 3557 PetscBool allcolumns = PETSC_FALSE; 3558 MPI_Comm comm; 3559 3560 PetscFunctionBegin; 3561 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3562 if (call == MAT_REUSE_MATRIX) { 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3564 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3565 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3568 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3571 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3572 3573 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3574 3575 } else { /* call == MAT_INITIAL_MATRIX) */ 3576 PetscBool flg; 3577 3578 PetscCall(ISGetLocalSize(iscol, &n)); 3579 PetscCall(ISGetSize(iscol, &Ncols)); 3580 3581 /* (1) iscol -> nonscalable iscol_local */ 3582 /* Check for special case: each processor gets entire matrix columns */ 3583 PetscCall(ISIdentity(iscol_local, &flg)); 3584 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3585 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3586 if (allcolumns) { 3587 iscol_sub = iscol_local; 3588 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3589 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3590 3591 } else { 3592 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3593 PetscInt *idx, *cmap1, k; 3594 PetscCall(PetscMalloc1(Ncols, &idx)); 3595 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3596 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3597 count = 0; 3598 k = 0; 3599 for (i = 0; i < Ncols; i++) { 3600 j = is_idx[i]; 3601 if (j >= cstart && j < cend) { 3602 /* diagonal part of mat */ 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (Bn) { 3606 /* off-diagonal part of mat */ 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (j > garray[k]) { 3611 while (j > garray[k] && k < Bn - 1) k++; 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } 3616 } 3617 } 3618 } 3619 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3620 3621 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3622 PetscCall(ISGetBlockSize(iscol, &cbs)); 3623 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3624 3625 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3626 } 3627 3628 /* (3) Create sequential Msub */ 3629 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3630 } 3631 3632 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3633 aij = (Mat_SeqAIJ *)(Msub)->data; 3634 ii = aij->i; 3635 PetscCall(ISGetIndices(iscmap, &cmap)); 3636 3637 /* 3638 m - number of local rows 3639 Ncols - number of columns (same on all processors) 3640 rstart - first row in new global matrix generated 3641 */ 3642 PetscCall(MatGetSize(Msub, &m, NULL)); 3643 3644 if (call == MAT_INITIAL_MATRIX) { 3645 /* (4) Create parallel newmat */ 3646 PetscMPIInt rank, size; 3647 PetscInt csize; 3648 3649 PetscCallMPI(MPI_Comm_size(comm, &size)); 3650 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3651 3652 /* 3653 Determine the number of non-zeros in the diagonal and off-diagonal 3654 portions of the matrix in order to do correct preallocation 3655 */ 3656 3657 /* first get start and end of "diagonal" columns */ 3658 PetscCall(ISGetLocalSize(iscol, &csize)); 3659 if (csize == PETSC_DECIDE) { 3660 PetscCall(ISGetSize(isrow, &mglobal)); 3661 if (mglobal == Ncols) { /* square matrix */ 3662 nlocal = m; 3663 } else { 3664 nlocal = Ncols / size + ((Ncols % size) > rank); 3665 } 3666 } else { 3667 nlocal = csize; 3668 } 3669 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3670 rstart = rend - nlocal; 3671 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3672 3673 /* next, compute all the lengths */ 3674 jj = aij->j; 3675 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3676 olens = dlens + m; 3677 for (i = 0; i < m; i++) { 3678 jend = ii[i + 1] - ii[i]; 3679 olen = 0; 3680 dlen = 0; 3681 for (j = 0; j < jend; j++) { 3682 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3683 else dlen++; 3684 jj++; 3685 } 3686 olens[i] = olen; 3687 dlens[i] = dlen; 3688 } 3689 3690 PetscCall(ISGetBlockSize(isrow, &bs)); 3691 PetscCall(ISGetBlockSize(iscol, &cbs)); 3692 3693 PetscCall(MatCreate(comm, &M)); 3694 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3695 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3696 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3697 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3698 PetscCall(PetscFree(dlens)); 3699 3700 } else { /* call == MAT_REUSE_MATRIX */ 3701 M = *newmat; 3702 PetscCall(MatGetLocalSize(M, &i, NULL)); 3703 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3704 PetscCall(MatZeroEntries(M)); 3705 /* 3706 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3707 rather than the slower MatSetValues(). 3708 */ 3709 M->was_assembled = PETSC_TRUE; 3710 M->assembled = PETSC_FALSE; 3711 } 3712 3713 /* (5) Set values of Msub to *newmat */ 3714 PetscCall(PetscMalloc1(count, &colsub)); 3715 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3716 3717 jj = aij->j; 3718 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3719 for (i = 0; i < m; i++) { 3720 row = rstart + i; 3721 nz = ii[i + 1] - ii[i]; 3722 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3723 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3724 jj += nz; 3725 aa += nz; 3726 } 3727 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3728 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3729 3730 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3731 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3732 3733 PetscCall(PetscFree(colsub)); 3734 3735 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3736 if (call == MAT_INITIAL_MATRIX) { 3737 *newmat = M; 3738 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3739 PetscCall(MatDestroy(&Msub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3742 PetscCall(ISDestroy(&iscol_sub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3745 PetscCall(ISDestroy(&iscmap)); 3746 3747 if (iscol_local) { 3748 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3749 PetscCall(ISDestroy(&iscol_local)); 3750 } 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /* 3756 Not great since it makes two copies of the submatrix, first an SeqAIJ 3757 in local and then by concatenating the local matrices the end result. 3758 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3759 3760 This requires a sequential iscol with all indices. 3761 */ 3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3763 { 3764 PetscMPIInt rank, size; 3765 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3766 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3767 Mat M, Mreuse; 3768 MatScalar *aa, *vwork; 3769 MPI_Comm comm; 3770 Mat_SeqAIJ *aij; 3771 PetscBool colflag, allcolumns = PETSC_FALSE; 3772 3773 PetscFunctionBegin; 3774 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3775 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3776 PetscCallMPI(MPI_Comm_size(comm, &size)); 3777 3778 /* Check for special case: each processor gets entire matrix columns */ 3779 PetscCall(ISIdentity(iscol, &colflag)); 3780 PetscCall(ISGetLocalSize(iscol, &n)); 3781 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3782 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3783 3784 if (call == MAT_REUSE_MATRIX) { 3785 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3786 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3788 } else { 3789 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3790 } 3791 3792 /* 3793 m - number of local rows 3794 n - number of columns (same on all processors) 3795 rstart - first row in new global matrix generated 3796 */ 3797 PetscCall(MatGetSize(Mreuse, &m, &n)); 3798 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3799 if (call == MAT_INITIAL_MATRIX) { 3800 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3801 ii = aij->i; 3802 jj = aij->j; 3803 3804 /* 3805 Determine the number of non-zeros in the diagonal and off-diagonal 3806 portions of the matrix in order to do correct preallocation 3807 */ 3808 3809 /* first get start and end of "diagonal" columns */ 3810 if (csize == PETSC_DECIDE) { 3811 PetscCall(ISGetSize(isrow, &mglobal)); 3812 if (mglobal == n) { /* square matrix */ 3813 nlocal = m; 3814 } else { 3815 nlocal = n / size + ((n % size) > rank); 3816 } 3817 } else { 3818 nlocal = csize; 3819 } 3820 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3821 rstart = rend - nlocal; 3822 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3823 3824 /* next, compute all the lengths */ 3825 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3826 olens = dlens + m; 3827 for (i = 0; i < m; i++) { 3828 jend = ii[i + 1] - ii[i]; 3829 olen = 0; 3830 dlen = 0; 3831 for (j = 0; j < jend; j++) { 3832 if (*jj < rstart || *jj >= rend) olen++; 3833 else dlen++; 3834 jj++; 3835 } 3836 olens[i] = olen; 3837 dlens[i] = dlen; 3838 } 3839 PetscCall(MatCreate(comm, &M)); 3840 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3841 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3842 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3843 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3844 PetscCall(PetscFree(dlens)); 3845 } else { 3846 PetscInt ml, nl; 3847 3848 M = *newmat; 3849 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3850 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3851 PetscCall(MatZeroEntries(M)); 3852 /* 3853 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3854 rather than the slower MatSetValues(). 3855 */ 3856 M->was_assembled = PETSC_TRUE; 3857 M->assembled = PETSC_FALSE; 3858 } 3859 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3860 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3861 ii = aij->i; 3862 jj = aij->j; 3863 3864 /* trigger copy to CPU if needed */ 3865 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3866 for (i = 0; i < m; i++) { 3867 row = rstart + i; 3868 nz = ii[i + 1] - ii[i]; 3869 cwork = jj; 3870 jj += nz; 3871 vwork = aa; 3872 aa += nz; 3873 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3874 } 3875 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3876 3877 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3878 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3879 *newmat = M; 3880 3881 /* save submatrix used in processor for next request */ 3882 if (call == MAT_INITIAL_MATRIX) { 3883 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3884 PetscCall(MatDestroy(&Mreuse)); 3885 } 3886 PetscFunctionReturn(PETSC_SUCCESS); 3887 } 3888 3889 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3890 { 3891 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3892 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3893 const PetscInt *JJ; 3894 PetscBool nooffprocentries; 3895 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3896 3897 PetscFunctionBegin; 3898 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3899 3900 PetscCall(PetscLayoutSetUp(B->rmap)); 3901 PetscCall(PetscLayoutSetUp(B->cmap)); 3902 m = B->rmap->n; 3903 cstart = B->cmap->rstart; 3904 cend = B->cmap->rend; 3905 rstart = B->rmap->rstart; 3906 3907 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3908 3909 if (PetscDefined(USE_DEBUG)) { 3910 for (i = 0; i < m; i++) { 3911 nnz = Ii[i + 1] - Ii[i]; 3912 JJ = J + Ii[i]; 3913 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3914 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3915 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3916 } 3917 } 3918 3919 for (i = 0; i < m; i++) { 3920 nnz = Ii[i + 1] - Ii[i]; 3921 JJ = J + Ii[i]; 3922 nnz_max = PetscMax(nnz_max, nnz); 3923 d = 0; 3924 for (j = 0; j < nnz; j++) { 3925 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3926 } 3927 d_nnz[i] = d; 3928 o_nnz[i] = nnz - d; 3929 } 3930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3931 PetscCall(PetscFree2(d_nnz, o_nnz)); 3932 3933 for (i = 0; i < m; i++) { 3934 ii = i + rstart; 3935 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3936 } 3937 nooffprocentries = B->nooffprocentries; 3938 B->nooffprocentries = PETSC_TRUE; 3939 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3940 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3941 B->nooffprocentries = nooffprocentries; 3942 3943 /* count number of entries below block diagonal */ 3944 PetscCall(PetscFree(Aij->ld)); 3945 PetscCall(PetscCalloc1(m, &ld)); 3946 Aij->ld = ld; 3947 for (i = 0; i < m; i++) { 3948 nnz = Ii[i + 1] - Ii[i]; 3949 j = 0; 3950 while (j < nnz && J[j] < cstart) j++; 3951 ld[i] = j; 3952 J += nnz; 3953 } 3954 3955 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3956 PetscFunctionReturn(PETSC_SUCCESS); 3957 } 3958 3959 /*@ 3960 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3961 (the default parallel PETSc format). 3962 3963 Collective 3964 3965 Input Parameters: 3966 + B - the matrix 3967 . i - the indices into j for the start of each local row (starts with zero) 3968 . j - the column indices for each local row (starts with zero) 3969 - v - optional values in the matrix 3970 3971 Level: developer 3972 3973 Notes: 3974 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3975 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3976 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3977 3978 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3979 3980 The format which is used for the sparse matrix input, is equivalent to a 3981 row-major ordering.. i.e for the following matrix, the input data expected is 3982 as shown 3983 3984 .vb 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0] rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = 3] 3993 v = {1,2,3} [size = 3] 3994 3995 Process1 [P1] rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = 3] 3998 v = {4,5,6} [size = 3] 3999 .ve 4000 4001 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4002 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4003 @*/ 4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4005 { 4006 PetscFunctionBegin; 4007 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4008 PetscFunctionReturn(PETSC_SUCCESS); 4009 } 4010 4011 /*@C 4012 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4013 (the default parallel PETSc format). For good matrix assembly performance 4014 the user should preallocate the matrix storage by setting the parameters 4015 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 Usage: 4038 Consider the following 8x8 matrix with 34 non-zero values, that is 4039 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4040 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4041 as follows 4042 4043 .vb 4044 1 2 0 | 0 3 0 | 0 4 4045 Proc0 0 5 6 | 7 0 0 | 8 0 4046 9 0 10 | 11 0 0 | 12 0 4047 ------------------------------------- 4048 13 0 14 | 15 16 17 | 0 0 4049 Proc1 0 18 0 | 19 20 21 | 0 0 4050 0 0 0 | 22 23 0 | 24 0 4051 ------------------------------------- 4052 Proc2 25 26 27 | 0 0 28 | 29 0 4053 30 0 0 | 31 32 33 | 0 34 4054 .ve 4055 4056 This can be represented as a collection of submatrices as 4057 .vb 4058 A B C 4059 D E F 4060 G H I 4061 .ve 4062 4063 Where the submatrices A,B,C are owned by proc0, D,E,F are 4064 owned by proc1, G,H,I are owned by proc2. 4065 4066 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4067 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'M','N' parameters are 8,8, and have the same values on all procs. 4069 4070 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4071 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4072 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4073 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4074 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4075 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4076 4077 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4078 allocated for every row of the local diagonal submatrix, and `o_nz` 4079 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4080 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4081 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4082 In this case, the values of `d_nz`, `o_nz` are 4083 .vb 4084 proc0 dnz = 2, o_nz = 2 4085 proc1 dnz = 3, o_nz = 2 4086 proc2 dnz = 1, o_nz = 4 4087 .ve 4088 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4089 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4090 for proc3. i.e we are using 12+15+10=37 storage locations to store 4091 34 values. 4092 4093 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4094 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4095 In the above case the values for `d_nnz`, `o_nnz` are 4096 .vb 4097 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4098 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4099 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4100 .ve 4101 Here the space allocated is sum of all the above values i.e 34, and 4102 hence pre-allocation is perfect. 4103 4104 Level: intermediate 4105 4106 Notes: 4107 If the *_nnz parameter is given then the *_nz parameter is ignored 4108 4109 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4110 storage. The stored row and column indices begin with zero. 4111 See [Sparse Matrices](sec_matsparse) for details. 4112 4113 The parallel matrix is partitioned such that the first m0 rows belong to 4114 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4115 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4116 4117 The DIAGONAL portion of the local submatrix of a processor can be defined 4118 as the submatrix which is obtained by extraction the part corresponding to 4119 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4120 first row that belongs to the processor, r2 is the last row belonging to 4121 the this processor, and c1-c2 is range of indices of the local part of a 4122 vector suitable for applying the matrix to. This is an mxn matrix. In the 4123 common case of a square matrix, the row and column ranges are the same and 4124 the DIAGONAL part is also square. The remaining portion of the local 4125 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4126 4127 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4128 4129 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4130 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4131 You can also run with the option `-info` and look for messages with the string 4132 malloc in them to see if additional memory allocation was needed. 4133 4134 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4135 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4136 @*/ 4137 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4138 { 4139 PetscFunctionBegin; 4140 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4141 PetscValidType(B, 1); 4142 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4143 PetscFunctionReturn(PETSC_SUCCESS); 4144 } 4145 4146 /*@ 4147 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4148 CSR format for the local rows. 4149 4150 Collective 4151 4152 Input Parameters: 4153 + comm - MPI communicator 4154 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4155 . n - This value should be the same as the local size used in creating the 4156 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4157 calculated if N is given) For square matrices n is almost always m. 4158 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4159 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4160 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4161 . j - column indices 4162 - a - optional matrix values 4163 4164 Output Parameter: 4165 . mat - the matrix 4166 4167 Level: intermediate 4168 4169 Notes: 4170 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4171 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4172 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4173 4174 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4175 4176 The format which is used for the sparse matrix input, is equivalent to a 4177 row-major ordering.. i.e for the following matrix, the input data expected is 4178 as shown 4179 4180 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4181 .vb 4182 1 0 0 4183 2 0 3 P0 4184 ------- 4185 4 5 6 P1 4186 4187 Process0 [P0] rows_owned=[0,1] 4188 i = {0,1,3} [size = nrow+1 = 2+1] 4189 j = {0,0,2} [size = 3] 4190 v = {1,2,3} [size = 3] 4191 4192 Process1 [P1] rows_owned=[2] 4193 i = {0,3} [size = nrow+1 = 1+1] 4194 j = {0,1,2} [size = 3] 4195 v = {4,5,6} [size = 3] 4196 .ve 4197 4198 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(PETSC_SUCCESS); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4217 from `MatCreateMPIAIJWithArrays()` 4218 4219 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4220 4221 Collective 4222 4223 Input Parameters: 4224 + mat - the matrix 4225 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4226 . n - This value should be the same as the local size used in creating the 4227 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4228 calculated if N is given) For square matrices n is almost always m. 4229 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4230 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4231 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4232 . J - column indices 4233 - v - matrix values 4234 4235 Level: deprecated 4236 4237 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4238 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4239 @*/ 4240 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4241 { 4242 PetscInt nnz, i; 4243 PetscBool nooffprocentries; 4244 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4245 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4246 PetscScalar *ad, *ao; 4247 PetscInt ldi, Iii, md; 4248 const PetscInt *Adi = Ad->i; 4249 PetscInt *ld = Aij->ld; 4250 4251 PetscFunctionBegin; 4252 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4253 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4254 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4256 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4258 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4259 4260 for (i = 0; i < m; i++) { 4261 nnz = Ii[i + 1] - Ii[i]; 4262 Iii = Ii[i]; 4263 ldi = ld[i]; 4264 md = Adi[i + 1] - Adi[i]; 4265 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4266 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4267 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4268 ad += md; 4269 ao += nnz - md; 4270 } 4271 nooffprocentries = mat->nooffprocentries; 4272 mat->nooffprocentries = PETSC_TRUE; 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4274 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4277 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4278 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4279 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4280 mat->nooffprocentries = nooffprocentries; 4281 PetscFunctionReturn(PETSC_SUCCESS); 4282 } 4283 4284 /*@ 4285 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4286 4287 Collective 4288 4289 Input Parameters: 4290 + mat - the matrix 4291 - v - matrix values, stored by row 4292 4293 Level: intermediate 4294 4295 Note: 4296 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4297 4298 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4299 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4300 @*/ 4301 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4302 { 4303 PetscInt nnz, i, m; 4304 PetscBool nooffprocentries; 4305 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4306 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4307 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4308 PetscScalar *ad, *ao; 4309 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4310 PetscInt ldi, Iii, md; 4311 PetscInt *ld = Aij->ld; 4312 4313 PetscFunctionBegin; 4314 m = mat->rmap->n; 4315 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4317 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4318 Iii = 0; 4319 for (i = 0; i < m; i++) { 4320 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4321 ldi = ld[i]; 4322 md = Adi[i + 1] - Adi[i]; 4323 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4324 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4325 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4326 ad += md; 4327 ao += nnz - md; 4328 Iii += nnz; 4329 } 4330 nooffprocentries = mat->nooffprocentries; 4331 mat->nooffprocentries = PETSC_TRUE; 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4333 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4336 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4337 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4338 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4339 mat->nooffprocentries = nooffprocentries; 4340 PetscFunctionReturn(PETSC_SUCCESS); 4341 } 4342 4343 /*@C 4344 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4345 (the default parallel PETSc format). For good matrix assembly performance 4346 the user should preallocate the matrix storage by setting the parameters 4347 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 Options Database Keys: 4379 + -mat_no_inode - Do not use inodes 4380 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4381 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4382 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4383 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4384 4385 Level: intermediate 4386 4387 Notes: 4388 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4389 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4390 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4391 4392 If the *_nnz parameter is given then the *_nz parameter is ignored 4393 4394 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4395 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4396 storage requirements for this matrix. 4397 4398 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4399 processor than it must be used on all processors that share the object for 4400 that argument. 4401 4402 The user MUST specify either the local or global matrix dimensions 4403 (possibly both). 4404 4405 The parallel matrix is partitioned across processors such that the 4406 first m0 rows belong to process 0, the next m1 rows belong to 4407 process 1, the next m2 rows belong to process 2 etc.. where 4408 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4409 values corresponding to [m x N] submatrix. 4410 4411 The columns are logically partitioned with the n0 columns belonging 4412 to 0th partition, the next n1 columns belonging to the next 4413 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4414 4415 The DIAGONAL portion of the local submatrix on any given processor 4416 is the submatrix corresponding to the rows and columns m,n 4417 corresponding to the given processor. i.e diagonal matrix on 4418 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4419 etc. The remaining portion of the local submatrix [m x (N-n)] 4420 constitute the OFF-DIAGONAL portion. The example below better 4421 illustrates this concept. 4422 4423 For a square global matrix we define each processor's diagonal portion 4424 to be its local rows and the corresponding columns (a square submatrix); 4425 each processor's off-diagonal portion encompasses the remainder of the 4426 local matrix (a rectangular submatrix). 4427 4428 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4429 4430 When calling this routine with a single process communicator, a matrix of 4431 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4432 type of communicator, use the construction mechanism 4433 .vb 4434 MatCreate(...,&A); 4435 MatSetType(A,MATMPIAIJ); 4436 MatSetSizes(A, m,n,M,N); 4437 MatMPIAIJSetPreallocation(A,...); 4438 .ve 4439 4440 By default, this format uses inodes (identical nodes) when possible. 4441 We search for consecutive rows with the same nonzero structure, thereby 4442 reusing matrix information to achieve increased efficiency. 4443 4444 Usage: 4445 Consider the following 8x8 matrix with 34 non-zero values, that is 4446 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4447 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4448 as follows 4449 4450 .vb 4451 1 2 0 | 0 3 0 | 0 4 4452 Proc0 0 5 6 | 7 0 0 | 8 0 4453 9 0 10 | 11 0 0 | 12 0 4454 ------------------------------------- 4455 13 0 14 | 15 16 17 | 0 0 4456 Proc1 0 18 0 | 19 20 21 | 0 0 4457 0 0 0 | 22 23 0 | 24 0 4458 ------------------------------------- 4459 Proc2 25 26 27 | 0 0 28 | 29 0 4460 30 0 0 | 31 32 33 | 0 34 4461 .ve 4462 4463 This can be represented as a collection of submatrices as 4464 4465 .vb 4466 A B C 4467 D E F 4468 G H I 4469 .ve 4470 4471 Where the submatrices A,B,C are owned by proc0, D,E,F are 4472 owned by proc1, G,H,I are owned by proc2. 4473 4474 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4475 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'M','N' parameters are 8,8, and have the same values on all procs. 4477 4478 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4479 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4480 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4481 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4482 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4483 matrix, ans [DF] as another SeqAIJ matrix. 4484 4485 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4486 allocated for every row of the local diagonal submatrix, and `o_nz` 4487 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4488 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4489 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4490 In this case, the values of `d_nz`,`o_nz` are 4491 .vb 4492 proc0 dnz = 2, o_nz = 2 4493 proc1 dnz = 3, o_nz = 2 4494 proc2 dnz = 1, o_nz = 4 4495 .ve 4496 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4497 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4498 for proc3. i.e we are using 12+15+10=37 storage locations to store 4499 34 values. 4500 4501 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4502 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4503 In the above case the values for d_nnz,o_nnz are 4504 .vb 4505 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4506 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4507 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4508 .ve 4509 Here the space allocated is sum of all the above values i.e 34, and 4510 hence pre-allocation is perfect. 4511 4512 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4513 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4514 @*/ 4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4516 { 4517 PetscMPIInt size; 4518 4519 PetscFunctionBegin; 4520 PetscCall(MatCreate(comm, A)); 4521 PetscCall(MatSetSizes(*A, m, n, M, N)); 4522 PetscCallMPI(MPI_Comm_size(comm, &size)); 4523 if (size > 1) { 4524 PetscCall(MatSetType(*A, MATMPIAIJ)); 4525 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4526 } else { 4527 PetscCall(MatSetType(*A, MATSEQAIJ)); 4528 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4529 } 4530 PetscFunctionReturn(PETSC_SUCCESS); 4531 } 4532 4533 /*MC 4534 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4535 4536 Synopsis: 4537 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4538 4539 Not Collective 4540 4541 Input Parameter: 4542 . A - the `MATMPIAIJ` matrix 4543 4544 Output Parameters: 4545 + Ad - the diagonal portion of the matrix 4546 . Ao - the off diagonal portion of the matrix 4547 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4548 - ierr - error code 4549 4550 Level: advanced 4551 4552 Note: 4553 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4554 4555 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4556 M*/ 4557 4558 /*MC 4559 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4560 4561 Synopsis: 4562 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4563 4564 Not Collective 4565 4566 Input Parameters: 4567 + A - the `MATMPIAIJ` matrix 4568 . Ad - the diagonal portion of the matrix 4569 . Ao - the off diagonal portion of the matrix 4570 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4571 - ierr - error code 4572 4573 Level: advanced 4574 4575 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4576 M*/ 4577 4578 /*@C 4579 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4580 4581 Not Collective 4582 4583 Input Parameter: 4584 . A - The `MATMPIAIJ` matrix 4585 4586 Output Parameters: 4587 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4588 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4589 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4590 4591 Level: intermediate 4592 4593 Note: 4594 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4595 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4596 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4597 local column numbers to global column numbers in the original matrix. 4598 4599 Fortran Note: 4600 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4601 4602 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4603 @*/ 4604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4605 { 4606 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4607 PetscBool flg; 4608 4609 PetscFunctionBegin; 4610 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4611 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4612 if (Ad) *Ad = a->A; 4613 if (Ao) *Ao = a->B; 4614 if (colmap) *colmap = a->garray; 4615 PetscFunctionReturn(PETSC_SUCCESS); 4616 } 4617 4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4619 { 4620 PetscInt m, N, i, rstart, nnz, Ii; 4621 PetscInt *indx; 4622 PetscScalar *values; 4623 MatType rootType; 4624 4625 PetscFunctionBegin; 4626 PetscCall(MatGetSize(inmat, &m, &N)); 4627 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4628 PetscInt *dnz, *onz, sum, bs, cbs; 4629 4630 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4631 /* Check sum(n) = N */ 4632 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4633 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4634 4635 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4636 rstart -= m; 4637 4638 MatPreallocateBegin(comm, m, n, dnz, onz); 4639 for (i = 0; i < m; i++) { 4640 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4641 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4642 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4643 } 4644 4645 PetscCall(MatCreate(comm, outmat)); 4646 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4647 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4648 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4649 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4650 PetscCall(MatSetType(*outmat, rootType)); 4651 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4652 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4653 MatPreallocateEnd(dnz, onz); 4654 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4655 } 4656 4657 /* numeric phase */ 4658 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4659 for (i = 0; i < m; i++) { 4660 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4661 Ii = i + rstart; 4662 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4663 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 } 4665 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4666 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4667 PetscFunctionReturn(PETSC_SUCCESS); 4668 } 4669 4670 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4671 { 4672 PetscMPIInt rank; 4673 PetscInt m, N, i, rstart, nnz; 4674 size_t len; 4675 const PetscInt *indx; 4676 PetscViewer out; 4677 char *name; 4678 Mat B; 4679 const PetscScalar *values; 4680 4681 PetscFunctionBegin; 4682 PetscCall(MatGetLocalSize(A, &m, NULL)); 4683 PetscCall(MatGetSize(A, NULL, &N)); 4684 /* Should this be the type of the diagonal block of A? */ 4685 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4686 PetscCall(MatSetSizes(B, m, N, m, N)); 4687 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4688 PetscCall(MatSetType(B, MATSEQAIJ)); 4689 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4690 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4691 for (i = 0; i < m; i++) { 4692 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4693 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4694 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4695 } 4696 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4697 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4698 4699 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4700 PetscCall(PetscStrlen(outfile, &len)); 4701 PetscCall(PetscMalloc1(len + 6, &name)); 4702 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4703 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4704 PetscCall(PetscFree(name)); 4705 PetscCall(MatView(B, out)); 4706 PetscCall(PetscViewerDestroy(&out)); 4707 PetscCall(MatDestroy(&B)); 4708 PetscFunctionReturn(PETSC_SUCCESS); 4709 } 4710 4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4712 { 4713 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4714 4715 PetscFunctionBegin; 4716 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4717 PetscCall(PetscFree(merge->id_r)); 4718 PetscCall(PetscFree(merge->len_s)); 4719 PetscCall(PetscFree(merge->len_r)); 4720 PetscCall(PetscFree(merge->bi)); 4721 PetscCall(PetscFree(merge->bj)); 4722 PetscCall(PetscFree(merge->buf_ri[0])); 4723 PetscCall(PetscFree(merge->buf_ri)); 4724 PetscCall(PetscFree(merge->buf_rj[0])); 4725 PetscCall(PetscFree(merge->buf_rj)); 4726 PetscCall(PetscFree(merge->coi)); 4727 PetscCall(PetscFree(merge->coj)); 4728 PetscCall(PetscFree(merge->owners_co)); 4729 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4730 PetscCall(PetscFree(merge)); 4731 PetscFunctionReturn(PETSC_SUCCESS); 4732 } 4733 4734 #include <../src/mat/utils/freespace.h> 4735 #include <petscbt.h> 4736 4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4738 { 4739 MPI_Comm comm; 4740 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4741 PetscMPIInt size, rank, taga, *len_s; 4742 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4743 PetscInt proc, m; 4744 PetscInt **buf_ri, **buf_rj; 4745 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4746 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4747 MPI_Request *s_waits, *r_waits; 4748 MPI_Status *status; 4749 const MatScalar *aa, *a_a; 4750 MatScalar **abuf_r, *ba_i; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4756 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4757 4758 PetscCallMPI(MPI_Comm_size(comm, &size)); 4759 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4760 4761 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4762 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4763 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4764 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4765 aa = a_a; 4766 4767 bi = merge->bi; 4768 bj = merge->bj; 4769 buf_ri = merge->buf_ri; 4770 buf_rj = merge->buf_rj; 4771 4772 PetscCall(PetscMalloc1(size, &status)); 4773 owners = merge->rowmap->range; 4774 len_s = merge->len_s; 4775 4776 /* send and recv matrix values */ 4777 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4778 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4779 4780 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4781 for (proc = 0, k = 0; proc < size; proc++) { 4782 if (!len_s[proc]) continue; 4783 i = owners[proc]; 4784 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4785 k++; 4786 } 4787 4788 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4789 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4790 PetscCall(PetscFree(status)); 4791 4792 PetscCall(PetscFree(s_waits)); 4793 PetscCall(PetscFree(r_waits)); 4794 4795 /* insert mat values of mpimat */ 4796 PetscCall(PetscMalloc1(N, &ba_i)); 4797 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4798 4799 for (k = 0; k < merge->nrecv; k++) { 4800 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4801 nrows = *(buf_ri_k[k]); 4802 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4803 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4804 } 4805 4806 /* set values of ba */ 4807 m = merge->rowmap->n; 4808 for (i = 0; i < m; i++) { 4809 arow = owners[rank] + i; 4810 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4811 bnzi = bi[i + 1] - bi[i]; 4812 PetscCall(PetscArrayzero(ba_i, bnzi)); 4813 4814 /* add local non-zero vals of this proc's seqmat into ba */ 4815 anzi = ai[arow + 1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 aa = a_a + ai[arow]; 4818 nextaj = 0; 4819 for (j = 0; nextaj < anzi; j++) { 4820 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4821 ba_i[j] += aa[nextaj++]; 4822 } 4823 } 4824 4825 /* add received vals into ba */ 4826 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4827 /* i-th row */ 4828 if (i == *nextrow[k]) { 4829 anzi = *(nextai[k] + 1) - *nextai[k]; 4830 aj = buf_rj[k] + *(nextai[k]); 4831 aa = abuf_r[k] + *(nextai[k]); 4832 nextaj = 0; 4833 for (j = 0; nextaj < anzi; j++) { 4834 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4835 ba_i[j] += aa[nextaj++]; 4836 } 4837 } 4838 nextrow[k]++; 4839 nextai[k]++; 4840 } 4841 } 4842 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4843 } 4844 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4845 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4846 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4847 4848 PetscCall(PetscFree(abuf_r[0])); 4849 PetscCall(PetscFree(abuf_r)); 4850 PetscCall(PetscFree(ba_i)); 4851 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4852 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4853 PetscFunctionReturn(PETSC_SUCCESS); 4854 } 4855 4856 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4857 { 4858 Mat B_mpi; 4859 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4860 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4861 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4862 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4863 PetscInt len, proc, *dnz, *onz, bs, cbs; 4864 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4865 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4866 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4867 MPI_Status *status; 4868 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4869 PetscBT lnkbt; 4870 Mat_Merge_SeqsToMPI *merge; 4871 PetscContainer container; 4872 4873 PetscFunctionBegin; 4874 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4875 4876 /* make sure it is a PETSc comm */ 4877 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4878 PetscCallMPI(MPI_Comm_size(comm, &size)); 4879 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4880 4881 PetscCall(PetscNew(&merge)); 4882 PetscCall(PetscMalloc1(size, &status)); 4883 4884 /* determine row ownership */ 4885 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4886 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4887 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4888 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4889 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4890 PetscCall(PetscMalloc1(size, &len_si)); 4891 PetscCall(PetscMalloc1(size, &merge->len_s)); 4892 4893 m = merge->rowmap->n; 4894 owners = merge->rowmap->range; 4895 4896 /* determine the number of messages to send, their lengths */ 4897 len_s = merge->len_s; 4898 4899 len = 0; /* length of buf_si[] */ 4900 merge->nsend = 0; 4901 for (proc = 0; proc < size; proc++) { 4902 len_si[proc] = 0; 4903 if (proc == rank) { 4904 len_s[proc] = 0; 4905 } else { 4906 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4907 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4908 } 4909 if (len_s[proc]) { 4910 merge->nsend++; 4911 nrows = 0; 4912 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4913 if (ai[i + 1] > ai[i]) nrows++; 4914 } 4915 len_si[proc] = 2 * (nrows + 1); 4916 len += len_si[proc]; 4917 } 4918 } 4919 4920 /* determine the number and length of messages to receive for ij-structure */ 4921 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4922 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4923 4924 /* post the Irecv of j-structure */ 4925 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4926 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4927 4928 /* post the Isend of j-structure */ 4929 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4930 4931 for (proc = 0, k = 0; proc < size; proc++) { 4932 if (!len_s[proc]) continue; 4933 i = owners[proc]; 4934 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4935 k++; 4936 } 4937 4938 /* receives and sends of j-structure are complete */ 4939 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4940 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4941 4942 /* send and recv i-structure */ 4943 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4944 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4945 4946 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4947 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4948 for (proc = 0, k = 0; proc < size; proc++) { 4949 if (!len_s[proc]) continue; 4950 /* form outgoing message for i-structure: 4951 buf_si[0]: nrows to be sent 4952 [1:nrows]: row index (global) 4953 [nrows+1:2*nrows+1]: i-structure index 4954 */ 4955 nrows = len_si[proc] / 2 - 1; 4956 buf_si_i = buf_si + nrows + 1; 4957 buf_si[0] = nrows; 4958 buf_si_i[0] = 0; 4959 nrows = 0; 4960 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4961 anzi = ai[i + 1] - ai[i]; 4962 if (anzi) { 4963 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4964 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4965 nrows++; 4966 } 4967 } 4968 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4969 k++; 4970 buf_si += len_si[proc]; 4971 } 4972 4973 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4974 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4975 4976 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4977 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4978 4979 PetscCall(PetscFree(len_si)); 4980 PetscCall(PetscFree(len_ri)); 4981 PetscCall(PetscFree(rj_waits)); 4982 PetscCall(PetscFree2(si_waits, sj_waits)); 4983 PetscCall(PetscFree(ri_waits)); 4984 PetscCall(PetscFree(buf_s)); 4985 PetscCall(PetscFree(status)); 4986 4987 /* compute a local seq matrix in each processor */ 4988 /* allocate bi array and free space for accumulating nonzero column info */ 4989 PetscCall(PetscMalloc1(m + 1, &bi)); 4990 bi[0] = 0; 4991 4992 /* create and initialize a linked list */ 4993 nlnk = N + 1; 4994 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4995 4996 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4997 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4998 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4999 5000 current_space = free_space; 5001 5002 /* determine symbolic info for each local row */ 5003 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5004 5005 for (k = 0; k < merge->nrecv; k++) { 5006 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5007 nrows = *buf_ri_k[k]; 5008 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5009 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5010 } 5011 5012 MatPreallocateBegin(comm, m, n, dnz, onz); 5013 len = 0; 5014 for (i = 0; i < m; i++) { 5015 bnzi = 0; 5016 /* add local non-zero cols of this proc's seqmat into lnk */ 5017 arow = owners[rank] + i; 5018 anzi = ai[arow + 1] - ai[arow]; 5019 aj = a->j + ai[arow]; 5020 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5021 bnzi += nlnk; 5022 /* add received col data into lnk */ 5023 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5024 if (i == *nextrow[k]) { /* i-th row */ 5025 anzi = *(nextai[k] + 1) - *nextai[k]; 5026 aj = buf_rj[k] + *nextai[k]; 5027 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5028 bnzi += nlnk; 5029 nextrow[k]++; 5030 nextai[k]++; 5031 } 5032 } 5033 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5034 5035 /* if free space is not available, make more free space */ 5036 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5037 /* copy data into free space, then initialize lnk */ 5038 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5039 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5040 5041 current_space->array += bnzi; 5042 current_space->local_used += bnzi; 5043 current_space->local_remaining -= bnzi; 5044 5045 bi[i + 1] = bi[i] + bnzi; 5046 } 5047 5048 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5049 5050 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5051 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5052 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5053 5054 /* create symbolic parallel matrix B_mpi */ 5055 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5056 PetscCall(MatCreate(comm, &B_mpi)); 5057 if (n == PETSC_DECIDE) { 5058 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5059 } else { 5060 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5061 } 5062 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5063 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5064 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5065 MatPreallocateEnd(dnz, onz); 5066 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5067 5068 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5069 B_mpi->assembled = PETSC_FALSE; 5070 merge->bi = bi; 5071 merge->bj = bj; 5072 merge->buf_ri = buf_ri; 5073 merge->buf_rj = buf_rj; 5074 merge->coi = NULL; 5075 merge->coj = NULL; 5076 merge->owners_co = NULL; 5077 5078 PetscCall(PetscCommDestroy(&comm)); 5079 5080 /* attach the supporting struct to B_mpi for reuse */ 5081 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5082 PetscCall(PetscContainerSetPointer(container, merge)); 5083 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5084 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5085 PetscCall(PetscContainerDestroy(&container)); 5086 *mpimat = B_mpi; 5087 5088 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5089 PetscFunctionReturn(PETSC_SUCCESS); 5090 } 5091 5092 /*@C 5093 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5094 matrices from each processor 5095 5096 Collective 5097 5098 Input Parameters: 5099 + comm - the communicators the parallel matrix will live on 5100 . seqmat - the input sequential matrices 5101 . m - number of local rows (or `PETSC_DECIDE`) 5102 . n - number of local columns (or `PETSC_DECIDE`) 5103 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5104 5105 Output Parameter: 5106 . mpimat - the parallel matrix generated 5107 5108 Level: advanced 5109 5110 Note: 5111 The dimensions of the sequential matrix in each processor MUST be the same. 5112 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5113 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5114 5115 seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()` 5116 @*/ 5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5118 { 5119 PetscMPIInt size; 5120 5121 PetscFunctionBegin; 5122 PetscCallMPI(MPI_Comm_size(comm, &size)); 5123 if (size == 1) { 5124 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5125 if (scall == MAT_INITIAL_MATRIX) { 5126 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5127 } else { 5128 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5129 } 5130 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 PetscFunctionReturn(PETSC_SUCCESS); 5132 } 5133 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5134 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5135 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5136 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 PetscFunctionReturn(PETSC_SUCCESS); 5138 } 5139 5140 /*@ 5141 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with 5142 mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained 5143 with `MatGetSize()` 5144 5145 Not Collective 5146 5147 Input Parameters: 5148 . A - the matrix 5149 5150 Output Parameter: 5151 . A_loc - the local sequential matrix generated 5152 5153 Level: developer 5154 5155 Notes: 5156 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5157 5158 Destroy the matrix with `MatDestroy()` 5159 5160 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5161 @*/ 5162 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5163 { 5164 PetscBool mpi; 5165 5166 PetscFunctionBegin; 5167 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5168 if (mpi) { 5169 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5170 } else { 5171 *A_loc = A; 5172 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5173 } 5174 PetscFunctionReturn(PETSC_SUCCESS); 5175 } 5176 5177 /*@ 5178 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5179 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5180 with `MatGetSize()` 5181 5182 Not Collective 5183 5184 Input Parameters: 5185 + A - the matrix 5186 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5187 5188 Output Parameter: 5189 . A_loc - the local sequential matrix generated 5190 5191 Level: developer 5192 5193 Notes: 5194 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5195 5196 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5197 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5198 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5199 modify the values of the returned `A_loc`. 5200 5201 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5202 @*/ 5203 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5204 { 5205 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5206 Mat_SeqAIJ *mat, *a, *b; 5207 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5208 const PetscScalar *aa, *ba, *aav, *bav; 5209 PetscScalar *ca, *cam; 5210 PetscMPIInt size; 5211 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5212 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5213 PetscBool match; 5214 5215 PetscFunctionBegin; 5216 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5217 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5218 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5219 if (size == 1) { 5220 if (scall == MAT_INITIAL_MATRIX) { 5221 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5222 *A_loc = mpimat->A; 5223 } else if (scall == MAT_REUSE_MATRIX) { 5224 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5225 } 5226 PetscFunctionReturn(PETSC_SUCCESS); 5227 } 5228 5229 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5230 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5231 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5232 ai = a->i; 5233 aj = a->j; 5234 bi = b->i; 5235 bj = b->j; 5236 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5237 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5238 aa = aav; 5239 ba = bav; 5240 if (scall == MAT_INITIAL_MATRIX) { 5241 PetscCall(PetscMalloc1(1 + am, &ci)); 5242 ci[0] = 0; 5243 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5244 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5245 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5246 k = 0; 5247 for (i = 0; i < am; i++) { 5248 ncols_o = bi[i + 1] - bi[i]; 5249 ncols_d = ai[i + 1] - ai[i]; 5250 /* off-diagonal portion of A */ 5251 for (jo = 0; jo < ncols_o; jo++) { 5252 col = cmap[*bj]; 5253 if (col >= cstart) break; 5254 cj[k] = col; 5255 bj++; 5256 ca[k++] = *ba++; 5257 } 5258 /* diagonal portion of A */ 5259 for (j = 0; j < ncols_d; j++) { 5260 cj[k] = cstart + *aj++; 5261 ca[k++] = *aa++; 5262 } 5263 /* off-diagonal portion of A */ 5264 for (j = jo; j < ncols_o; j++) { 5265 cj[k] = cmap[*bj++]; 5266 ca[k++] = *ba++; 5267 } 5268 } 5269 /* put together the new matrix */ 5270 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5271 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5272 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5273 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5274 mat->free_a = PETSC_TRUE; 5275 mat->free_ij = PETSC_TRUE; 5276 mat->nonew = 0; 5277 } else if (scall == MAT_REUSE_MATRIX) { 5278 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5279 ci = mat->i; 5280 cj = mat->j; 5281 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5282 for (i = 0; i < am; i++) { 5283 /* off-diagonal portion of A */ 5284 ncols_o = bi[i + 1] - bi[i]; 5285 for (jo = 0; jo < ncols_o; jo++) { 5286 col = cmap[*bj]; 5287 if (col >= cstart) break; 5288 *cam++ = *ba++; 5289 bj++; 5290 } 5291 /* diagonal portion of A */ 5292 ncols_d = ai[i + 1] - ai[i]; 5293 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5294 /* off-diagonal portion of A */ 5295 for (j = jo; j < ncols_o; j++) { 5296 *cam++ = *ba++; 5297 bj++; 5298 } 5299 } 5300 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5301 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5302 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5303 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5304 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5305 PetscFunctionReturn(PETSC_SUCCESS); 5306 } 5307 5308 /*@ 5309 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5310 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5311 5312 Not Collective 5313 5314 Input Parameters: 5315 + A - the matrix 5316 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5317 5318 Output Parameters: 5319 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5320 - A_loc - the local sequential matrix generated 5321 5322 Level: developer 5323 5324 Note: 5325 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5326 part, then those associated with the off diagonal part (in its local ordering) 5327 5328 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5329 @*/ 5330 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5331 { 5332 Mat Ao, Ad; 5333 const PetscInt *cmap; 5334 PetscMPIInt size; 5335 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5336 5337 PetscFunctionBegin; 5338 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5339 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5340 if (size == 1) { 5341 if (scall == MAT_INITIAL_MATRIX) { 5342 PetscCall(PetscObjectReference((PetscObject)Ad)); 5343 *A_loc = Ad; 5344 } else if (scall == MAT_REUSE_MATRIX) { 5345 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5346 } 5347 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5348 PetscFunctionReturn(PETSC_SUCCESS); 5349 } 5350 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5351 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5352 if (f) { 5353 PetscCall((*f)(A, scall, glob, A_loc)); 5354 } else { 5355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5357 Mat_SeqAIJ *c; 5358 PetscInt *ai = a->i, *aj = a->j; 5359 PetscInt *bi = b->i, *bj = b->j; 5360 PetscInt *ci, *cj; 5361 const PetscScalar *aa, *ba; 5362 PetscScalar *ca; 5363 PetscInt i, j, am, dn, on; 5364 5365 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5366 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5367 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5368 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5369 if (scall == MAT_INITIAL_MATRIX) { 5370 PetscInt k; 5371 PetscCall(PetscMalloc1(1 + am, &ci)); 5372 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5373 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5374 ci[0] = 0; 5375 for (i = 0, k = 0; i < am; i++) { 5376 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5377 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5378 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5379 /* diagonal portion of A */ 5380 for (j = 0; j < ncols_d; j++, k++) { 5381 cj[k] = *aj++; 5382 ca[k] = *aa++; 5383 } 5384 /* off-diagonal portion of A */ 5385 for (j = 0; j < ncols_o; j++, k++) { 5386 cj[k] = dn + *bj++; 5387 ca[k] = *ba++; 5388 } 5389 } 5390 /* put together the new matrix */ 5391 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5392 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5393 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5394 c = (Mat_SeqAIJ *)(*A_loc)->data; 5395 c->free_a = PETSC_TRUE; 5396 c->free_ij = PETSC_TRUE; 5397 c->nonew = 0; 5398 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5399 } else if (scall == MAT_REUSE_MATRIX) { 5400 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5401 for (i = 0; i < am; i++) { 5402 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5403 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5404 /* diagonal portion of A */ 5405 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5406 /* off-diagonal portion of A */ 5407 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5408 } 5409 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5410 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5411 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5412 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5413 if (glob) { 5414 PetscInt cst, *gidx; 5415 5416 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5417 PetscCall(PetscMalloc1(dn + on, &gidx)); 5418 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5419 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5420 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5421 } 5422 } 5423 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5424 PetscFunctionReturn(PETSC_SUCCESS); 5425 } 5426 5427 /*@C 5428 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5429 5430 Not Collective 5431 5432 Input Parameters: 5433 + A - the matrix 5434 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5435 . row - index set of rows to extract (or `NULL`) 5436 - col - index set of columns to extract (or `NULL`) 5437 5438 Output Parameter: 5439 . A_loc - the local sequential matrix generated 5440 5441 Level: developer 5442 5443 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5444 @*/ 5445 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5446 { 5447 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5448 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5449 IS isrowa, iscola; 5450 Mat *aloc; 5451 PetscBool match; 5452 5453 PetscFunctionBegin; 5454 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5455 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5456 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5457 if (!row) { 5458 start = A->rmap->rstart; 5459 end = A->rmap->rend; 5460 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5461 } else { 5462 isrowa = *row; 5463 } 5464 if (!col) { 5465 start = A->cmap->rstart; 5466 cmap = a->garray; 5467 nzA = a->A->cmap->n; 5468 nzB = a->B->cmap->n; 5469 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5470 ncols = 0; 5471 for (i = 0; i < nzB; i++) { 5472 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5473 else break; 5474 } 5475 imark = i; 5476 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5477 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5478 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5479 } else { 5480 iscola = *col; 5481 } 5482 if (scall != MAT_INITIAL_MATRIX) { 5483 PetscCall(PetscMalloc1(1, &aloc)); 5484 aloc[0] = *A_loc; 5485 } 5486 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5487 if (!col) { /* attach global id of condensed columns */ 5488 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5489 } 5490 *A_loc = aloc[0]; 5491 PetscCall(PetscFree(aloc)); 5492 if (!row) PetscCall(ISDestroy(&isrowa)); 5493 if (!col) PetscCall(ISDestroy(&iscola)); 5494 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5495 PetscFunctionReturn(PETSC_SUCCESS); 5496 } 5497 5498 /* 5499 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5500 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5501 * on a global size. 5502 * */ 5503 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5504 { 5505 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5506 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5507 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5508 PetscMPIInt owner; 5509 PetscSFNode *iremote, *oiremote; 5510 const PetscInt *lrowindices; 5511 PetscSF sf, osf; 5512 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5513 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5514 MPI_Comm comm; 5515 ISLocalToGlobalMapping mapping; 5516 const PetscScalar *pd_a, *po_a; 5517 5518 PetscFunctionBegin; 5519 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5520 /* plocalsize is the number of roots 5521 * nrows is the number of leaves 5522 * */ 5523 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5524 PetscCall(ISGetLocalSize(rows, &nrows)); 5525 PetscCall(PetscCalloc1(nrows, &iremote)); 5526 PetscCall(ISGetIndices(rows, &lrowindices)); 5527 for (i = 0; i < nrows; i++) { 5528 /* Find a remote index and an owner for a row 5529 * The row could be local or remote 5530 * */ 5531 owner = 0; 5532 lidx = 0; 5533 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5534 iremote[i].index = lidx; 5535 iremote[i].rank = owner; 5536 } 5537 /* Create SF to communicate how many nonzero columns for each row */ 5538 PetscCall(PetscSFCreate(comm, &sf)); 5539 /* SF will figure out the number of nonzero colunms for each row, and their 5540 * offsets 5541 * */ 5542 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5543 PetscCall(PetscSFSetFromOptions(sf)); 5544 PetscCall(PetscSFSetUp(sf)); 5545 5546 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5547 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5548 PetscCall(PetscCalloc1(nrows, &pnnz)); 5549 roffsets[0] = 0; 5550 roffsets[1] = 0; 5551 for (i = 0; i < plocalsize; i++) { 5552 /* diag */ 5553 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5554 /* off diag */ 5555 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5556 /* compute offsets so that we relative location for each row */ 5557 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5558 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5559 } 5560 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5561 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5562 /* 'r' means root, and 'l' means leaf */ 5563 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5564 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5565 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5566 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5567 PetscCall(PetscSFDestroy(&sf)); 5568 PetscCall(PetscFree(roffsets)); 5569 PetscCall(PetscFree(nrcols)); 5570 dntotalcols = 0; 5571 ontotalcols = 0; 5572 ncol = 0; 5573 for (i = 0; i < nrows; i++) { 5574 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5575 ncol = PetscMax(pnnz[i], ncol); 5576 /* diag */ 5577 dntotalcols += nlcols[i * 2 + 0]; 5578 /* off diag */ 5579 ontotalcols += nlcols[i * 2 + 1]; 5580 } 5581 /* We do not need to figure the right number of columns 5582 * since all the calculations will be done by going through the raw data 5583 * */ 5584 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5585 PetscCall(MatSetUp(*P_oth)); 5586 PetscCall(PetscFree(pnnz)); 5587 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5588 /* diag */ 5589 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5590 /* off diag */ 5591 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5592 /* diag */ 5593 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5594 /* off diag */ 5595 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5596 dntotalcols = 0; 5597 ontotalcols = 0; 5598 ntotalcols = 0; 5599 for (i = 0; i < nrows; i++) { 5600 owner = 0; 5601 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5602 /* Set iremote for diag matrix */ 5603 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5604 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5605 iremote[dntotalcols].rank = owner; 5606 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5607 ilocal[dntotalcols++] = ntotalcols++; 5608 } 5609 /* off diag */ 5610 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5611 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5612 oiremote[ontotalcols].rank = owner; 5613 oilocal[ontotalcols++] = ntotalcols++; 5614 } 5615 } 5616 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5617 PetscCall(PetscFree(loffsets)); 5618 PetscCall(PetscFree(nlcols)); 5619 PetscCall(PetscSFCreate(comm, &sf)); 5620 /* P serves as roots and P_oth is leaves 5621 * Diag matrix 5622 * */ 5623 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5624 PetscCall(PetscSFSetFromOptions(sf)); 5625 PetscCall(PetscSFSetUp(sf)); 5626 5627 PetscCall(PetscSFCreate(comm, &osf)); 5628 /* Off diag */ 5629 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5630 PetscCall(PetscSFSetFromOptions(osf)); 5631 PetscCall(PetscSFSetUp(osf)); 5632 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5633 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5634 /* We operate on the matrix internal data for saving memory */ 5635 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5637 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5638 /* Convert to global indices for diag matrix */ 5639 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5640 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5641 /* We want P_oth store global indices */ 5642 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5643 /* Use memory scalable approach */ 5644 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5645 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5646 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5647 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5648 /* Convert back to local indices */ 5649 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5650 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5651 nout = 0; 5652 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5653 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5654 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5655 /* Exchange values */ 5656 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5657 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5658 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5659 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5660 /* Stop PETSc from shrinking memory */ 5661 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5662 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5663 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5664 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5665 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5666 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5667 PetscCall(PetscSFDestroy(&sf)); 5668 PetscCall(PetscSFDestroy(&osf)); 5669 PetscFunctionReturn(PETSC_SUCCESS); 5670 } 5671 5672 /* 5673 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5674 * This supports MPIAIJ and MAIJ 5675 * */ 5676 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5677 { 5678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5679 Mat_SeqAIJ *p_oth; 5680 IS rows, map; 5681 PetscHMapI hamp; 5682 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5683 MPI_Comm comm; 5684 PetscSF sf, osf; 5685 PetscBool has; 5686 5687 PetscFunctionBegin; 5688 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5689 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5690 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5691 * and then create a submatrix (that often is an overlapping matrix) 5692 * */ 5693 if (reuse == MAT_INITIAL_MATRIX) { 5694 /* Use a hash table to figure out unique keys */ 5695 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5696 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5697 count = 0; 5698 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5699 for (i = 0; i < a->B->cmap->n; i++) { 5700 key = a->garray[i] / dof; 5701 PetscCall(PetscHMapIHas(hamp, key, &has)); 5702 if (!has) { 5703 mapping[i] = count; 5704 PetscCall(PetscHMapISet(hamp, key, count++)); 5705 } else { 5706 /* Current 'i' has the same value the previous step */ 5707 mapping[i] = count - 1; 5708 } 5709 } 5710 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5711 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5712 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5713 PetscCall(PetscCalloc1(htsize, &rowindices)); 5714 off = 0; 5715 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5716 PetscCall(PetscHMapIDestroy(&hamp)); 5717 PetscCall(PetscSortInt(htsize, rowindices)); 5718 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5719 /* In case, the matrix was already created but users want to recreate the matrix */ 5720 PetscCall(MatDestroy(P_oth)); 5721 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5722 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5723 PetscCall(ISDestroy(&map)); 5724 PetscCall(ISDestroy(&rows)); 5725 } else if (reuse == MAT_REUSE_MATRIX) { 5726 /* If matrix was already created, we simply update values using SF objects 5727 * that as attached to the matrix earlier. 5728 */ 5729 const PetscScalar *pd_a, *po_a; 5730 5731 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5732 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5733 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5734 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5735 /* Update values in place */ 5736 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5737 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5738 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5739 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5740 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5741 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5742 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5743 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5744 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5745 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5746 PetscFunctionReturn(PETSC_SUCCESS); 5747 } 5748 5749 /*@C 5750 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5751 5752 Collective 5753 5754 Input Parameters: 5755 + A - the first matrix in `MATMPIAIJ` format 5756 . B - the second matrix in `MATMPIAIJ` format 5757 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5758 5759 Output Parameters: 5760 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5761 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5762 - B_seq - the sequential matrix generated 5763 5764 Level: developer 5765 5766 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5767 @*/ 5768 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5769 { 5770 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5771 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5772 IS isrowb, iscolb; 5773 Mat *bseq = NULL; 5774 5775 PetscFunctionBegin; 5776 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5777 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5778 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5779 5780 if (scall == MAT_INITIAL_MATRIX) { 5781 start = A->cmap->rstart; 5782 cmap = a->garray; 5783 nzA = a->A->cmap->n; 5784 nzB = a->B->cmap->n; 5785 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5786 ncols = 0; 5787 for (i = 0; i < nzB; i++) { /* row < local row index */ 5788 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5789 else break; 5790 } 5791 imark = i; 5792 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5793 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5794 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5795 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5796 } else { 5797 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5798 isrowb = *rowb; 5799 iscolb = *colb; 5800 PetscCall(PetscMalloc1(1, &bseq)); 5801 bseq[0] = *B_seq; 5802 } 5803 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5804 *B_seq = bseq[0]; 5805 PetscCall(PetscFree(bseq)); 5806 if (!rowb) { 5807 PetscCall(ISDestroy(&isrowb)); 5808 } else { 5809 *rowb = isrowb; 5810 } 5811 if (!colb) { 5812 PetscCall(ISDestroy(&iscolb)); 5813 } else { 5814 *colb = iscolb; 5815 } 5816 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5817 PetscFunctionReturn(PETSC_SUCCESS); 5818 } 5819 5820 /* 5821 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5822 of the OFF-DIAGONAL portion of local A 5823 5824 Collective 5825 5826 Input Parameters: 5827 + A,B - the matrices in `MATMPIAIJ` format 5828 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5829 5830 Output Parameter: 5831 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5832 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5833 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5834 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5835 5836 Developer Note: 5837 This directly accesses information inside the VecScatter associated with the matrix-vector product 5838 for this matrix. This is not desirable.. 5839 5840 Level: developer 5841 5842 */ 5843 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5844 { 5845 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5846 Mat_SeqAIJ *b_oth; 5847 VecScatter ctx; 5848 MPI_Comm comm; 5849 const PetscMPIInt *rprocs, *sprocs; 5850 const PetscInt *srow, *rstarts, *sstarts; 5851 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5852 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5853 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5854 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5855 PetscMPIInt size, tag, rank, nreqs; 5856 5857 PetscFunctionBegin; 5858 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5859 PetscCallMPI(MPI_Comm_size(comm, &size)); 5860 5861 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5862 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5863 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5864 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5865 5866 if (size == 1) { 5867 startsj_s = NULL; 5868 bufa_ptr = NULL; 5869 *B_oth = NULL; 5870 PetscFunctionReturn(PETSC_SUCCESS); 5871 } 5872 5873 ctx = a->Mvctx; 5874 tag = ((PetscObject)ctx)->tag; 5875 5876 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5877 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5878 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5879 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5880 PetscCall(PetscMalloc1(nreqs, &reqs)); 5881 rwaits = reqs; 5882 swaits = reqs + nrecvs; 5883 5884 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5885 if (scall == MAT_INITIAL_MATRIX) { 5886 /* i-array */ 5887 /* post receives */ 5888 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5889 for (i = 0; i < nrecvs; i++) { 5890 rowlen = rvalues + rstarts[i] * rbs; 5891 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5892 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5893 } 5894 5895 /* pack the outgoing message */ 5896 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5897 5898 sstartsj[0] = 0; 5899 rstartsj[0] = 0; 5900 len = 0; /* total length of j or a array to be sent */ 5901 if (nsends) { 5902 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5903 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5904 } 5905 for (i = 0; i < nsends; i++) { 5906 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5907 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5908 for (j = 0; j < nrows; j++) { 5909 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5910 for (l = 0; l < sbs; l++) { 5911 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5912 5913 rowlen[j * sbs + l] = ncols; 5914 5915 len += ncols; 5916 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5917 } 5918 k++; 5919 } 5920 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5921 5922 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5923 } 5924 /* recvs and sends of i-array are completed */ 5925 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5926 PetscCall(PetscFree(svalues)); 5927 5928 /* allocate buffers for sending j and a arrays */ 5929 PetscCall(PetscMalloc1(len + 1, &bufj)); 5930 PetscCall(PetscMalloc1(len + 1, &bufa)); 5931 5932 /* create i-array of B_oth */ 5933 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5934 5935 b_othi[0] = 0; 5936 len = 0; /* total length of j or a array to be received */ 5937 k = 0; 5938 for (i = 0; i < nrecvs; i++) { 5939 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5940 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5941 for (j = 0; j < nrows; j++) { 5942 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5943 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5944 k++; 5945 } 5946 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5947 } 5948 PetscCall(PetscFree(rvalues)); 5949 5950 /* allocate space for j and a arrays of B_oth */ 5951 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5952 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5953 5954 /* j-array */ 5955 /* post receives of j-array */ 5956 for (i = 0; i < nrecvs; i++) { 5957 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5958 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5959 } 5960 5961 /* pack the outgoing message j-array */ 5962 if (nsends) k = sstarts[0]; 5963 for (i = 0; i < nsends; i++) { 5964 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5965 bufJ = bufj + sstartsj[i]; 5966 for (j = 0; j < nrows; j++) { 5967 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5968 for (ll = 0; ll < sbs; ll++) { 5969 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5970 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5971 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5972 } 5973 } 5974 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5975 } 5976 5977 /* recvs and sends of j-array are completed */ 5978 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5979 } else if (scall == MAT_REUSE_MATRIX) { 5980 sstartsj = *startsj_s; 5981 rstartsj = *startsj_r; 5982 bufa = *bufa_ptr; 5983 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5984 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5985 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5986 5987 /* a-array */ 5988 /* post receives of a-array */ 5989 for (i = 0; i < nrecvs; i++) { 5990 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5991 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5992 } 5993 5994 /* pack the outgoing message a-array */ 5995 if (nsends) k = sstarts[0]; 5996 for (i = 0; i < nsends; i++) { 5997 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5998 bufA = bufa + sstartsj[i]; 5999 for (j = 0; j < nrows; j++) { 6000 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6001 for (ll = 0; ll < sbs; ll++) { 6002 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6003 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6004 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6005 } 6006 } 6007 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6008 } 6009 /* recvs and sends of a-array are completed */ 6010 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6011 PetscCall(PetscFree(reqs)); 6012 6013 if (scall == MAT_INITIAL_MATRIX) { 6014 /* put together the new matrix */ 6015 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6016 6017 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6018 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6019 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6020 b_oth->free_a = PETSC_TRUE; 6021 b_oth->free_ij = PETSC_TRUE; 6022 b_oth->nonew = 0; 6023 6024 PetscCall(PetscFree(bufj)); 6025 if (!startsj_s || !bufa_ptr) { 6026 PetscCall(PetscFree2(sstartsj, rstartsj)); 6027 PetscCall(PetscFree(bufa_ptr)); 6028 } else { 6029 *startsj_s = sstartsj; 6030 *startsj_r = rstartsj; 6031 *bufa_ptr = bufa; 6032 } 6033 } else if (scall == MAT_REUSE_MATRIX) { 6034 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6035 } 6036 6037 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6038 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6039 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6040 PetscFunctionReturn(PETSC_SUCCESS); 6041 } 6042 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6046 #if defined(PETSC_HAVE_MKL_SPARSE) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6051 #if defined(PETSC_HAVE_ELEMENTAL) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_SCALAPACK) 6055 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_HYPRE) 6058 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_CUDA) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_HIP) 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6068 #endif 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6070 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6071 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6072 6073 /* 6074 Computes (B'*A')' since computing B*A directly is untenable 6075 6076 n p p 6077 [ ] [ ] [ ] 6078 m [ A ] * n [ B ] = m [ C ] 6079 [ ] [ ] [ ] 6080 6081 */ 6082 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6083 { 6084 Mat At, Bt, Ct; 6085 6086 PetscFunctionBegin; 6087 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6088 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6089 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6090 PetscCall(MatDestroy(&At)); 6091 PetscCall(MatDestroy(&Bt)); 6092 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6093 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6094 PetscCall(MatDestroy(&Ct)); 6095 PetscFunctionReturn(PETSC_SUCCESS); 6096 } 6097 6098 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6099 { 6100 PetscBool cisdense; 6101 6102 PetscFunctionBegin; 6103 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6104 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6105 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6106 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6107 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6108 PetscCall(MatSetUp(C)); 6109 6110 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6111 PetscFunctionReturn(PETSC_SUCCESS); 6112 } 6113 6114 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6115 { 6116 Mat_Product *product = C->product; 6117 Mat A = product->A, B = product->B; 6118 6119 PetscFunctionBegin; 6120 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6121 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6122 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6123 C->ops->productsymbolic = MatProductSymbolic_AB; 6124 PetscFunctionReturn(PETSC_SUCCESS); 6125 } 6126 6127 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6128 { 6129 Mat_Product *product = C->product; 6130 6131 PetscFunctionBegin; 6132 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6133 PetscFunctionReturn(PETSC_SUCCESS); 6134 } 6135 6136 /* 6137 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6138 6139 Input Parameters: 6140 6141 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6142 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6143 6144 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6145 6146 For Set1, j1[] contains column indices of the nonzeros. 6147 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6148 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6149 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6150 6151 Similar for Set2. 6152 6153 This routine merges the two sets of nonzeros row by row and removes repeats. 6154 6155 Output Parameters: (memory is allocated by the caller) 6156 6157 i[],j[]: the CSR of the merged matrix, which has m rows. 6158 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6159 imap2[]: similar to imap1[], but for Set2. 6160 Note we order nonzeros row-by-row and from left to right. 6161 */ 6162 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6163 { 6164 PetscInt r, m; /* Row index of mat */ 6165 PetscCount t, t1, t2, b1, e1, b2, e2; 6166 6167 PetscFunctionBegin; 6168 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6169 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6170 i[0] = 0; 6171 for (r = 0; r < m; r++) { /* Do row by row merging */ 6172 b1 = rowBegin1[r]; 6173 e1 = rowEnd1[r]; 6174 b2 = rowBegin2[r]; 6175 e2 = rowEnd2[r]; 6176 while (b1 < e1 && b2 < e2) { 6177 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6178 j[t] = j1[b1]; 6179 imap1[t1] = t; 6180 imap2[t2] = t; 6181 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6182 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6183 t1++; 6184 t2++; 6185 t++; 6186 } else if (j1[b1] < j2[b2]) { 6187 j[t] = j1[b1]; 6188 imap1[t1] = t; 6189 b1 += jmap1[t1 + 1] - jmap1[t1]; 6190 t1++; 6191 t++; 6192 } else { 6193 j[t] = j2[b2]; 6194 imap2[t2] = t; 6195 b2 += jmap2[t2 + 1] - jmap2[t2]; 6196 t2++; 6197 t++; 6198 } 6199 } 6200 /* Merge the remaining in either j1[] or j2[] */ 6201 while (b1 < e1) { 6202 j[t] = j1[b1]; 6203 imap1[t1] = t; 6204 b1 += jmap1[t1 + 1] - jmap1[t1]; 6205 t1++; 6206 t++; 6207 } 6208 while (b2 < e2) { 6209 j[t] = j2[b2]; 6210 imap2[t2] = t; 6211 b2 += jmap2[t2 + 1] - jmap2[t2]; 6212 t2++; 6213 t++; 6214 } 6215 i[r + 1] = t; 6216 } 6217 PetscFunctionReturn(PETSC_SUCCESS); 6218 } 6219 6220 /* 6221 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6222 6223 Input Parameters: 6224 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6225 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6226 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6227 6228 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6229 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6230 6231 Output Parameters: 6232 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6233 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6234 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6235 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6236 6237 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6238 Atot: number of entries belonging to the diagonal block. 6239 Annz: number of unique nonzeros belonging to the diagonal block. 6240 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6241 repeats (i.e., same 'i,j' pair). 6242 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6243 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6244 6245 Atot: number of entries belonging to the diagonal block 6246 Annz: number of unique nonzeros belonging to the diagonal block. 6247 6248 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6249 6250 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6251 */ 6252 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6253 { 6254 PetscInt cstart, cend, rstart, rend, row, col; 6255 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6256 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6257 PetscCount k, m, p, q, r, s, mid; 6258 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6259 6260 PetscFunctionBegin; 6261 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6262 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6263 m = rend - rstart; 6264 6265 for (k = 0; k < n; k++) { 6266 if (i[k] >= 0) break; 6267 } /* Skip negative rows */ 6268 6269 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6270 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6271 */ 6272 while (k < n) { 6273 row = i[k]; 6274 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6275 for (s = k; s < n; s++) 6276 if (i[s] != row) break; 6277 for (p = k; p < s; p++) { 6278 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6279 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6280 } 6281 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6282 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6283 rowBegin[row - rstart] = k; 6284 rowMid[row - rstart] = mid; 6285 rowEnd[row - rstart] = s; 6286 6287 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6288 Atot += mid - k; 6289 Btot += s - mid; 6290 6291 /* Count unique nonzeros of this diag/offdiag row */ 6292 for (p = k; p < mid;) { 6293 col = j[p]; 6294 do { 6295 j[p] += PETSC_MAX_INT; 6296 p++; 6297 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6298 Annz++; 6299 } 6300 6301 for (p = mid; p < s;) { 6302 col = j[p]; 6303 do { 6304 p++; 6305 } while (p < s && j[p] == col); 6306 Bnnz++; 6307 } 6308 k = s; 6309 } 6310 6311 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6312 PetscCall(PetscMalloc1(Atot, &Aperm)); 6313 PetscCall(PetscMalloc1(Btot, &Bperm)); 6314 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6315 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6316 6317 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6318 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6319 for (r = 0; r < m; r++) { 6320 k = rowBegin[r]; 6321 mid = rowMid[r]; 6322 s = rowEnd[r]; 6323 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6324 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6325 Atot += mid - k; 6326 Btot += s - mid; 6327 6328 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6329 for (p = k; p < mid;) { 6330 col = j[p]; 6331 q = p; 6332 do { 6333 p++; 6334 } while (p < mid && j[p] == col); 6335 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6336 Annz++; 6337 } 6338 6339 for (p = mid; p < s;) { 6340 col = j[p]; 6341 q = p; 6342 do { 6343 p++; 6344 } while (p < s && j[p] == col); 6345 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6346 Bnnz++; 6347 } 6348 } 6349 /* Output */ 6350 *Aperm_ = Aperm; 6351 *Annz_ = Annz; 6352 *Atot_ = Atot; 6353 *Ajmap_ = Ajmap; 6354 *Bperm_ = Bperm; 6355 *Bnnz_ = Bnnz; 6356 *Btot_ = Btot; 6357 *Bjmap_ = Bjmap; 6358 PetscFunctionReturn(PETSC_SUCCESS); 6359 } 6360 6361 /* 6362 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6363 6364 Input Parameters: 6365 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6366 nnz: number of unique nonzeros in the merged matrix 6367 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6368 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6369 6370 Output Parameter: (memory is allocated by the caller) 6371 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6372 6373 Example: 6374 nnz1 = 4 6375 nnz = 6 6376 imap = [1,3,4,5] 6377 jmap = [0,3,5,6,7] 6378 then, 6379 jmap_new = [0,0,3,3,5,6,7] 6380 */ 6381 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6382 { 6383 PetscCount k, p; 6384 6385 PetscFunctionBegin; 6386 jmap_new[0] = 0; 6387 p = nnz; /* p loops over jmap_new[] backwards */ 6388 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6389 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6390 } 6391 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6392 PetscFunctionReturn(PETSC_SUCCESS); 6393 } 6394 6395 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6396 { 6397 MPI_Comm comm; 6398 PetscMPIInt rank, size; 6399 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6400 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6401 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6402 6403 PetscFunctionBegin; 6404 PetscCall(PetscFree(mpiaij->garray)); 6405 PetscCall(VecDestroy(&mpiaij->lvec)); 6406 #if defined(PETSC_USE_CTABLE) 6407 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6408 #else 6409 PetscCall(PetscFree(mpiaij->colmap)); 6410 #endif 6411 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6412 mat->assembled = PETSC_FALSE; 6413 mat->was_assembled = PETSC_FALSE; 6414 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6415 6416 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6417 PetscCallMPI(MPI_Comm_size(comm, &size)); 6418 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6419 PetscCall(PetscLayoutSetUp(mat->rmap)); 6420 PetscCall(PetscLayoutSetUp(mat->cmap)); 6421 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6422 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6423 PetscCall(MatGetLocalSize(mat, &m, &n)); 6424 PetscCall(MatGetSize(mat, &M, &N)); 6425 6426 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6427 /* entries come first, then local rows, then remote rows. */ 6428 PetscCount n1 = coo_n, *perm1; 6429 PetscInt *i1 = coo_i, *j1 = coo_j; 6430 6431 PetscCall(PetscMalloc1(n1, &perm1)); 6432 for (k = 0; k < n1; k++) perm1[k] = k; 6433 6434 /* Manipulate indices so that entries with negative row or col indices will have smallest 6435 row indices, local entries will have greater but negative row indices, and remote entries 6436 will have positive row indices. 6437 */ 6438 for (k = 0; k < n1; k++) { 6439 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6440 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6441 else { 6442 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6443 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6444 } 6445 } 6446 6447 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6448 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6449 for (k = 0; k < n1; k++) { 6450 if (i1[k] > PETSC_MIN_INT) break; 6451 } /* Advance k to the first entry we need to take care of */ 6452 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6453 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6454 6455 /* Split local rows into diag/offdiag portions */ 6456 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6457 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6458 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6459 6460 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6461 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6462 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6463 6464 /* Send remote rows to their owner */ 6465 /* Find which rows should be sent to which remote ranks*/ 6466 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6467 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6468 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6469 const PetscInt *ranges; 6470 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6471 6472 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6473 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6474 for (k = rem; k < n1;) { 6475 PetscMPIInt owner; 6476 PetscInt firstRow, lastRow; 6477 6478 /* Locate a row range */ 6479 firstRow = i1[k]; /* first row of this owner */ 6480 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6481 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6482 6483 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6484 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6485 6486 /* All entries in [k,p) belong to this remote owner */ 6487 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6488 PetscMPIInt *sendto2; 6489 PetscInt *nentries2; 6490 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6491 6492 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6493 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6494 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6495 PetscCall(PetscFree2(sendto, nentries2)); 6496 sendto = sendto2; 6497 nentries = nentries2; 6498 maxNsend = maxNsend2; 6499 } 6500 sendto[nsend] = owner; 6501 nentries[nsend] = p - k; 6502 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6503 nsend++; 6504 k = p; 6505 } 6506 6507 /* Build 1st SF to know offsets on remote to send data */ 6508 PetscSF sf1; 6509 PetscInt nroots = 1, nroots2 = 0; 6510 PetscInt nleaves = nsend, nleaves2 = 0; 6511 PetscInt *offsets; 6512 PetscSFNode *iremote; 6513 6514 PetscCall(PetscSFCreate(comm, &sf1)); 6515 PetscCall(PetscMalloc1(nsend, &iremote)); 6516 PetscCall(PetscMalloc1(nsend, &offsets)); 6517 for (k = 0; k < nsend; k++) { 6518 iremote[k].rank = sendto[k]; 6519 iremote[k].index = 0; 6520 nleaves2 += nentries[k]; 6521 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6522 } 6523 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6524 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6525 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6526 PetscCall(PetscSFDestroy(&sf1)); 6527 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6528 6529 /* Build 2nd SF to send remote COOs to their owner */ 6530 PetscSF sf2; 6531 nroots = nroots2; 6532 nleaves = nleaves2; 6533 PetscCall(PetscSFCreate(comm, &sf2)); 6534 PetscCall(PetscSFSetFromOptions(sf2)); 6535 PetscCall(PetscMalloc1(nleaves, &iremote)); 6536 p = 0; 6537 for (k = 0; k < nsend; k++) { 6538 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6539 for (q = 0; q < nentries[k]; q++, p++) { 6540 iremote[p].rank = sendto[k]; 6541 iremote[p].index = offsets[k] + q; 6542 } 6543 } 6544 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6545 6546 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6547 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6548 6549 /* Send the remote COOs to their owner */ 6550 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6551 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6552 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6553 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6554 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6555 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6557 6558 PetscCall(PetscFree(offsets)); 6559 PetscCall(PetscFree2(sendto, nentries)); 6560 6561 /* Sort received COOs by row along with the permutation array */ 6562 for (k = 0; k < n2; k++) perm2[k] = k; 6563 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6564 6565 /* Split received COOs into diag/offdiag portions */ 6566 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6567 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6568 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6569 6570 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6571 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6572 6573 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6574 PetscInt *Ai, *Bi; 6575 PetscInt *Aj, *Bj; 6576 6577 PetscCall(PetscMalloc1(m + 1, &Ai)); 6578 PetscCall(PetscMalloc1(m + 1, &Bi)); 6579 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6580 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6581 6582 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6583 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6584 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6585 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6586 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6587 6588 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6589 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6590 6591 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6592 /* expect nonzeros in A/B most likely have local contributing entries */ 6593 PetscInt Annz = Ai[m]; 6594 PetscInt Bnnz = Bi[m]; 6595 PetscCount *Ajmap1_new, *Bjmap1_new; 6596 6597 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6598 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6599 6600 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6601 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6602 6603 PetscCall(PetscFree(Aimap1)); 6604 PetscCall(PetscFree(Ajmap1)); 6605 PetscCall(PetscFree(Bimap1)); 6606 PetscCall(PetscFree(Bjmap1)); 6607 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6608 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6609 PetscCall(PetscFree(perm1)); 6610 PetscCall(PetscFree3(i2, j2, perm2)); 6611 6612 Ajmap1 = Ajmap1_new; 6613 Bjmap1 = Bjmap1_new; 6614 6615 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6616 if (Annz < Annz1 + Annz2) { 6617 PetscInt *Aj_new; 6618 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6619 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6620 PetscCall(PetscFree(Aj)); 6621 Aj = Aj_new; 6622 } 6623 6624 if (Bnnz < Bnnz1 + Bnnz2) { 6625 PetscInt *Bj_new; 6626 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6627 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6628 PetscCall(PetscFree(Bj)); 6629 Bj = Bj_new; 6630 } 6631 6632 /* Create new submatrices for on-process and off-process coupling */ 6633 PetscScalar *Aa, *Ba; 6634 MatType rtype; 6635 Mat_SeqAIJ *a, *b; 6636 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6637 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6638 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6639 if (cstart) { 6640 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6641 } 6642 PetscCall(MatDestroy(&mpiaij->A)); 6643 PetscCall(MatDestroy(&mpiaij->B)); 6644 PetscCall(MatGetRootType_Private(mat, &rtype)); 6645 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6646 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6647 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6648 6649 a = (Mat_SeqAIJ *)mpiaij->A->data; 6650 b = (Mat_SeqAIJ *)mpiaij->B->data; 6651 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6652 a->free_a = b->free_a = PETSC_TRUE; 6653 a->free_ij = b->free_ij = PETSC_TRUE; 6654 6655 /* conversion must happen AFTER multiply setup */ 6656 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6657 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6658 PetscCall(VecDestroy(&mpiaij->lvec)); 6659 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6660 6661 mpiaij->coo_n = coo_n; 6662 mpiaij->coo_sf = sf2; 6663 mpiaij->sendlen = nleaves; 6664 mpiaij->recvlen = nroots; 6665 6666 mpiaij->Annz = Annz; 6667 mpiaij->Bnnz = Bnnz; 6668 6669 mpiaij->Annz2 = Annz2; 6670 mpiaij->Bnnz2 = Bnnz2; 6671 6672 mpiaij->Atot1 = Atot1; 6673 mpiaij->Atot2 = Atot2; 6674 mpiaij->Btot1 = Btot1; 6675 mpiaij->Btot2 = Btot2; 6676 6677 mpiaij->Ajmap1 = Ajmap1; 6678 mpiaij->Aperm1 = Aperm1; 6679 6680 mpiaij->Bjmap1 = Bjmap1; 6681 mpiaij->Bperm1 = Bperm1; 6682 6683 mpiaij->Aimap2 = Aimap2; 6684 mpiaij->Ajmap2 = Ajmap2; 6685 mpiaij->Aperm2 = Aperm2; 6686 6687 mpiaij->Bimap2 = Bimap2; 6688 mpiaij->Bjmap2 = Bjmap2; 6689 mpiaij->Bperm2 = Bperm2; 6690 6691 mpiaij->Cperm1 = Cperm1; 6692 6693 /* Allocate in preallocation. If not used, it has zero cost on host */ 6694 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6695 PetscFunctionReturn(PETSC_SUCCESS); 6696 } 6697 6698 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6699 { 6700 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6701 Mat A = mpiaij->A, B = mpiaij->B; 6702 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6703 PetscScalar *Aa, *Ba; 6704 PetscScalar *sendbuf = mpiaij->sendbuf; 6705 PetscScalar *recvbuf = mpiaij->recvbuf; 6706 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6707 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6708 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6709 const PetscCount *Cperm1 = mpiaij->Cperm1; 6710 6711 PetscFunctionBegin; 6712 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6713 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6714 6715 /* Pack entries to be sent to remote */ 6716 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6717 6718 /* Send remote entries to their owner and overlap the communication with local computation */ 6719 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6720 /* Add local entries to A and B */ 6721 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6722 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6723 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6724 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6725 } 6726 for (PetscCount i = 0; i < Bnnz; i++) { 6727 PetscScalar sum = 0.0; 6728 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6729 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6730 } 6731 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6732 6733 /* Add received remote entries to A and B */ 6734 for (PetscCount i = 0; i < Annz2; i++) { 6735 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6736 } 6737 for (PetscCount i = 0; i < Bnnz2; i++) { 6738 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6739 } 6740 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6741 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6742 PetscFunctionReturn(PETSC_SUCCESS); 6743 } 6744 6745 /*MC 6746 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6747 6748 Options Database Keys: 6749 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6750 6751 Level: beginner 6752 6753 Notes: 6754 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6755 in this case the values associated with the rows and columns one passes in are set to zero 6756 in the matrix 6757 6758 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6759 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6760 6761 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6762 M*/ 6763 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6764 { 6765 Mat_MPIAIJ *b; 6766 PetscMPIInt size; 6767 6768 PetscFunctionBegin; 6769 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6770 6771 PetscCall(PetscNew(&b)); 6772 B->data = (void *)b; 6773 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6774 B->assembled = PETSC_FALSE; 6775 B->insertmode = NOT_SET_VALUES; 6776 b->size = size; 6777 6778 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6779 6780 /* build cache for off array entries formed */ 6781 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6782 6783 b->donotstash = PETSC_FALSE; 6784 b->colmap = NULL; 6785 b->garray = NULL; 6786 b->roworiented = PETSC_TRUE; 6787 6788 /* stuff used for matrix vector multiply */ 6789 b->lvec = NULL; 6790 b->Mvctx = NULL; 6791 6792 /* stuff for MatGetRow() */ 6793 b->rowindices = NULL; 6794 b->rowvalues = NULL; 6795 b->getrowactive = PETSC_FALSE; 6796 6797 /* flexible pointer used in CUSPARSE classes */ 6798 b->spptr = NULL; 6799 6800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6801 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6810 #if defined(PETSC_HAVE_CUDA) 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6812 #endif 6813 #if defined(PETSC_HAVE_HIP) 6814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6815 #endif 6816 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6818 #endif 6819 #if defined(PETSC_HAVE_MKL_SPARSE) 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6821 #endif 6822 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6826 #if defined(PETSC_HAVE_ELEMENTAL) 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6828 #endif 6829 #if defined(PETSC_HAVE_SCALAPACK) 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6831 #endif 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6834 #if defined(PETSC_HAVE_HYPRE) 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6837 #endif 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6842 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6843 PetscFunctionReturn(PETSC_SUCCESS); 6844 } 6845 6846 /*@C 6847 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6848 and "off-diagonal" part of the matrix in CSR format. 6849 6850 Collective 6851 6852 Input Parameters: 6853 + comm - MPI communicator 6854 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6855 . n - This value should be the same as the local size used in creating the 6856 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6857 calculated if `N` is given) For square matrices `n` is almost always `m`. 6858 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6859 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6860 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6861 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6862 . a - matrix values 6863 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6864 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6865 - oa - matrix values 6866 6867 Output Parameter: 6868 . mat - the matrix 6869 6870 Level: advanced 6871 6872 Notes: 6873 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6874 must free the arrays once the matrix has been destroyed and not before. 6875 6876 The `i` and `j` indices are 0 based 6877 6878 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6879 6880 This sets local rows and cannot be used to set off-processor values. 6881 6882 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6883 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6884 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6885 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6886 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6887 communication if it is known that only local entries will be set. 6888 6889 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6890 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6891 @*/ 6892 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6893 { 6894 Mat_MPIAIJ *maij; 6895 6896 PetscFunctionBegin; 6897 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6898 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6899 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6900 PetscCall(MatCreate(comm, mat)); 6901 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6902 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6903 maij = (Mat_MPIAIJ *)(*mat)->data; 6904 6905 (*mat)->preallocated = PETSC_TRUE; 6906 6907 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6908 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6909 6910 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6911 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6912 6913 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6914 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6915 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6916 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6917 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6918 PetscFunctionReturn(PETSC_SUCCESS); 6919 } 6920 6921 typedef struct { 6922 Mat *mp; /* intermediate products */ 6923 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6924 PetscInt cp; /* number of intermediate products */ 6925 6926 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6927 PetscInt *startsj_s, *startsj_r; 6928 PetscScalar *bufa; 6929 Mat P_oth; 6930 6931 /* may take advantage of merging product->B */ 6932 Mat Bloc; /* B-local by merging diag and off-diag */ 6933 6934 /* cusparse does not have support to split between symbolic and numeric phases. 6935 When api_user is true, we don't need to update the numerical values 6936 of the temporary storage */ 6937 PetscBool reusesym; 6938 6939 /* support for COO values insertion */ 6940 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6941 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6942 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6943 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6944 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6945 PetscMemType mtype; 6946 6947 /* customization */ 6948 PetscBool abmerge; 6949 PetscBool P_oth_bind; 6950 } MatMatMPIAIJBACKEND; 6951 6952 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6953 { 6954 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6955 PetscInt i; 6956 6957 PetscFunctionBegin; 6958 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6959 PetscCall(PetscFree(mmdata->bufa)); 6960 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6961 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6962 PetscCall(MatDestroy(&mmdata->P_oth)); 6963 PetscCall(MatDestroy(&mmdata->Bloc)); 6964 PetscCall(PetscSFDestroy(&mmdata->sf)); 6965 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6966 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6967 PetscCall(PetscFree(mmdata->own[0])); 6968 PetscCall(PetscFree(mmdata->own)); 6969 PetscCall(PetscFree(mmdata->off[0])); 6970 PetscCall(PetscFree(mmdata->off)); 6971 PetscCall(PetscFree(mmdata)); 6972 PetscFunctionReturn(PETSC_SUCCESS); 6973 } 6974 6975 /* Copy selected n entries with indices in idx[] of A to v[]. 6976 If idx is NULL, copy the whole data array of A to v[] 6977 */ 6978 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6979 { 6980 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6981 6982 PetscFunctionBegin; 6983 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6984 if (f) { 6985 PetscCall((*f)(A, n, idx, v)); 6986 } else { 6987 const PetscScalar *vv; 6988 6989 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6990 if (n && idx) { 6991 PetscScalar *w = v; 6992 const PetscInt *oi = idx; 6993 PetscInt j; 6994 6995 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6996 } else { 6997 PetscCall(PetscArraycpy(v, vv, n)); 6998 } 6999 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7000 } 7001 PetscFunctionReturn(PETSC_SUCCESS); 7002 } 7003 7004 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7005 { 7006 MatMatMPIAIJBACKEND *mmdata; 7007 PetscInt i, n_d, n_o; 7008 7009 PetscFunctionBegin; 7010 MatCheckProduct(C, 1); 7011 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7012 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7013 if (!mmdata->reusesym) { /* update temporary matrices */ 7014 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7015 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7016 } 7017 mmdata->reusesym = PETSC_FALSE; 7018 7019 for (i = 0; i < mmdata->cp; i++) { 7020 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7021 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7022 } 7023 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7024 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7025 7026 if (mmdata->mptmp[i]) continue; 7027 if (noff) { 7028 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7029 7030 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7031 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7032 n_o += noff; 7033 n_d += nown; 7034 } else { 7035 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7036 7037 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7038 n_d += mm->nz; 7039 } 7040 } 7041 if (mmdata->hasoffproc) { /* offprocess insertion */ 7042 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7043 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7044 } 7045 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7046 PetscFunctionReturn(PETSC_SUCCESS); 7047 } 7048 7049 /* Support for Pt * A, A * P, or Pt * A * P */ 7050 #define MAX_NUMBER_INTERMEDIATE 4 7051 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7052 { 7053 Mat_Product *product = C->product; 7054 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7055 Mat_MPIAIJ *a, *p; 7056 MatMatMPIAIJBACKEND *mmdata; 7057 ISLocalToGlobalMapping P_oth_l2g = NULL; 7058 IS glob = NULL; 7059 const char *prefix; 7060 char pprefix[256]; 7061 const PetscInt *globidx, *P_oth_idx; 7062 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7063 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7064 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7065 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7066 /* a base offset; type-2: sparse with a local to global map table */ 7067 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7068 7069 MatProductType ptype; 7070 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7071 PetscMPIInt size; 7072 7073 PetscFunctionBegin; 7074 MatCheckProduct(C, 1); 7075 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7076 ptype = product->type; 7077 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7078 ptype = MATPRODUCT_AB; 7079 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7080 } 7081 switch (ptype) { 7082 case MATPRODUCT_AB: 7083 A = product->A; 7084 P = product->B; 7085 m = A->rmap->n; 7086 n = P->cmap->n; 7087 M = A->rmap->N; 7088 N = P->cmap->N; 7089 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7090 break; 7091 case MATPRODUCT_AtB: 7092 P = product->A; 7093 A = product->B; 7094 m = P->cmap->n; 7095 n = A->cmap->n; 7096 M = P->cmap->N; 7097 N = A->cmap->N; 7098 hasoffproc = PETSC_TRUE; 7099 break; 7100 case MATPRODUCT_PtAP: 7101 A = product->A; 7102 P = product->B; 7103 m = P->cmap->n; 7104 n = P->cmap->n; 7105 M = P->cmap->N; 7106 N = P->cmap->N; 7107 hasoffproc = PETSC_TRUE; 7108 break; 7109 default: 7110 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7111 } 7112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7113 if (size == 1) hasoffproc = PETSC_FALSE; 7114 7115 /* defaults */ 7116 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7117 mp[i] = NULL; 7118 mptmp[i] = PETSC_FALSE; 7119 rmapt[i] = -1; 7120 cmapt[i] = -1; 7121 rmapa[i] = NULL; 7122 cmapa[i] = NULL; 7123 } 7124 7125 /* customization */ 7126 PetscCall(PetscNew(&mmdata)); 7127 mmdata->reusesym = product->api_user; 7128 if (ptype == MATPRODUCT_AB) { 7129 if (product->api_user) { 7130 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7131 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7132 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7133 PetscOptionsEnd(); 7134 } else { 7135 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7136 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7137 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7138 PetscOptionsEnd(); 7139 } 7140 } else if (ptype == MATPRODUCT_PtAP) { 7141 if (product->api_user) { 7142 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7143 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7144 PetscOptionsEnd(); 7145 } else { 7146 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7147 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7148 PetscOptionsEnd(); 7149 } 7150 } 7151 a = (Mat_MPIAIJ *)A->data; 7152 p = (Mat_MPIAIJ *)P->data; 7153 PetscCall(MatSetSizes(C, m, n, M, N)); 7154 PetscCall(PetscLayoutSetUp(C->rmap)); 7155 PetscCall(PetscLayoutSetUp(C->cmap)); 7156 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7157 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7158 7159 cp = 0; 7160 switch (ptype) { 7161 case MATPRODUCT_AB: /* A * P */ 7162 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7163 7164 /* A_diag * P_local (merged or not) */ 7165 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7166 /* P is product->B */ 7167 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7168 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7169 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7170 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7171 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7172 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7173 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7174 mp[cp]->product->api_user = product->api_user; 7175 PetscCall(MatProductSetFromOptions(mp[cp])); 7176 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7177 PetscCall(ISGetIndices(glob, &globidx)); 7178 rmapt[cp] = 1; 7179 cmapt[cp] = 2; 7180 cmapa[cp] = globidx; 7181 mptmp[cp] = PETSC_FALSE; 7182 cp++; 7183 } else { /* A_diag * P_diag and A_diag * P_off */ 7184 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7185 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7186 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7187 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7188 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7189 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7190 mp[cp]->product->api_user = product->api_user; 7191 PetscCall(MatProductSetFromOptions(mp[cp])); 7192 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7193 rmapt[cp] = 1; 7194 cmapt[cp] = 1; 7195 mptmp[cp] = PETSC_FALSE; 7196 cp++; 7197 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7198 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7199 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7200 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7201 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7202 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7203 mp[cp]->product->api_user = product->api_user; 7204 PetscCall(MatProductSetFromOptions(mp[cp])); 7205 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7206 rmapt[cp] = 1; 7207 cmapt[cp] = 2; 7208 cmapa[cp] = p->garray; 7209 mptmp[cp] = PETSC_FALSE; 7210 cp++; 7211 } 7212 7213 /* A_off * P_other */ 7214 if (mmdata->P_oth) { 7215 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7216 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7217 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7218 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7219 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7220 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7221 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7222 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7223 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7224 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7225 mp[cp]->product->api_user = product->api_user; 7226 PetscCall(MatProductSetFromOptions(mp[cp])); 7227 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7228 rmapt[cp] = 1; 7229 cmapt[cp] = 2; 7230 cmapa[cp] = P_oth_idx; 7231 mptmp[cp] = PETSC_FALSE; 7232 cp++; 7233 } 7234 break; 7235 7236 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7237 /* A is product->B */ 7238 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7239 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7240 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7241 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7242 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7243 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7244 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7245 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7246 mp[cp]->product->api_user = product->api_user; 7247 PetscCall(MatProductSetFromOptions(mp[cp])); 7248 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7249 PetscCall(ISGetIndices(glob, &globidx)); 7250 rmapt[cp] = 2; 7251 rmapa[cp] = globidx; 7252 cmapt[cp] = 2; 7253 cmapa[cp] = globidx; 7254 mptmp[cp] = PETSC_FALSE; 7255 cp++; 7256 } else { 7257 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7258 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7259 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7260 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7261 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7262 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7263 mp[cp]->product->api_user = product->api_user; 7264 PetscCall(MatProductSetFromOptions(mp[cp])); 7265 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7266 PetscCall(ISGetIndices(glob, &globidx)); 7267 rmapt[cp] = 1; 7268 cmapt[cp] = 2; 7269 cmapa[cp] = globidx; 7270 mptmp[cp] = PETSC_FALSE; 7271 cp++; 7272 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7273 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7274 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7275 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7276 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7277 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7278 mp[cp]->product->api_user = product->api_user; 7279 PetscCall(MatProductSetFromOptions(mp[cp])); 7280 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7281 rmapt[cp] = 2; 7282 rmapa[cp] = p->garray; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = globidx; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 } 7288 break; 7289 case MATPRODUCT_PtAP: 7290 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7291 /* P is product->B */ 7292 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7293 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7294 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7295 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7296 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7297 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7298 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7299 mp[cp]->product->api_user = product->api_user; 7300 PetscCall(MatProductSetFromOptions(mp[cp])); 7301 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7302 PetscCall(ISGetIndices(glob, &globidx)); 7303 rmapt[cp] = 2; 7304 rmapa[cp] = globidx; 7305 cmapt[cp] = 2; 7306 cmapa[cp] = globidx; 7307 mptmp[cp] = PETSC_FALSE; 7308 cp++; 7309 if (mmdata->P_oth) { 7310 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7311 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7312 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7313 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7314 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7315 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7316 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7317 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7318 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7319 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7320 mp[cp]->product->api_user = product->api_user; 7321 PetscCall(MatProductSetFromOptions(mp[cp])); 7322 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7323 mptmp[cp] = PETSC_TRUE; 7324 cp++; 7325 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 rmapt[cp] = 2; 7335 rmapa[cp] = globidx; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = P_oth_idx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 } 7341 break; 7342 default: 7343 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7344 } 7345 /* sanity check */ 7346 if (size > 1) 7347 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7348 7349 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7350 for (i = 0; i < cp; i++) { 7351 mmdata->mp[i] = mp[i]; 7352 mmdata->mptmp[i] = mptmp[i]; 7353 } 7354 mmdata->cp = cp; 7355 C->product->data = mmdata; 7356 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7357 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7358 7359 /* memory type */ 7360 mmdata->mtype = PETSC_MEMTYPE_HOST; 7361 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7362 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7363 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7364 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7365 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7366 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7367 7368 /* prepare coo coordinates for values insertion */ 7369 7370 /* count total nonzeros of those intermediate seqaij Mats 7371 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7372 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7373 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7374 */ 7375 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7376 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7377 if (mptmp[cp]) continue; 7378 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7379 const PetscInt *rmap = rmapa[cp]; 7380 const PetscInt mr = mp[cp]->rmap->n; 7381 const PetscInt rs = C->rmap->rstart; 7382 const PetscInt re = C->rmap->rend; 7383 const PetscInt *ii = mm->i; 7384 for (i = 0; i < mr; i++) { 7385 const PetscInt gr = rmap[i]; 7386 const PetscInt nz = ii[i + 1] - ii[i]; 7387 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7388 else ncoo_oown += nz; /* this row is local */ 7389 } 7390 } else ncoo_d += mm->nz; 7391 } 7392 7393 /* 7394 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7395 7396 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7397 7398 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7399 7400 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7401 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7402 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7403 7404 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7405 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7406 */ 7407 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7408 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7409 7410 /* gather (i,j) of nonzeros inserted by remote procs */ 7411 if (hasoffproc) { 7412 PetscSF msf; 7413 PetscInt ncoo2, *coo_i2, *coo_j2; 7414 7415 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7416 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7417 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7418 7419 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7420 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7421 PetscInt *idxoff = mmdata->off[cp]; 7422 PetscInt *idxown = mmdata->own[cp]; 7423 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7424 const PetscInt *rmap = rmapa[cp]; 7425 const PetscInt *cmap = cmapa[cp]; 7426 const PetscInt *ii = mm->i; 7427 PetscInt *coi = coo_i + ncoo_o; 7428 PetscInt *coj = coo_j + ncoo_o; 7429 const PetscInt mr = mp[cp]->rmap->n; 7430 const PetscInt rs = C->rmap->rstart; 7431 const PetscInt re = C->rmap->rend; 7432 const PetscInt cs = C->cmap->rstart; 7433 for (i = 0; i < mr; i++) { 7434 const PetscInt *jj = mm->j + ii[i]; 7435 const PetscInt gr = rmap[i]; 7436 const PetscInt nz = ii[i + 1] - ii[i]; 7437 if (gr < rs || gr >= re) { /* this is an offproc row */ 7438 for (j = ii[i]; j < ii[i + 1]; j++) { 7439 *coi++ = gr; 7440 *idxoff++ = j; 7441 } 7442 if (!cmapt[cp]) { /* already global */ 7443 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7444 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7445 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7446 } else { /* offdiag */ 7447 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7448 } 7449 ncoo_o += nz; 7450 } else { /* this is a local row */ 7451 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7452 } 7453 } 7454 } 7455 mmdata->off[cp + 1] = idxoff; 7456 mmdata->own[cp + 1] = idxown; 7457 } 7458 7459 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7460 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7461 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7462 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7463 ncoo = ncoo_d + ncoo_oown + ncoo2; 7464 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7465 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7466 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7467 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7468 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7469 PetscCall(PetscFree2(coo_i, coo_j)); 7470 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7471 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7472 coo_i = coo_i2; 7473 coo_j = coo_j2; 7474 } else { /* no offproc values insertion */ 7475 ncoo = ncoo_d; 7476 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7477 7478 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7479 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7480 PetscCall(PetscSFSetUp(mmdata->sf)); 7481 } 7482 mmdata->hasoffproc = hasoffproc; 7483 7484 /* gather (i,j) of nonzeros inserted locally */ 7485 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7486 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7487 PetscInt *coi = coo_i + ncoo_d; 7488 PetscInt *coj = coo_j + ncoo_d; 7489 const PetscInt *jj = mm->j; 7490 const PetscInt *ii = mm->i; 7491 const PetscInt *cmap = cmapa[cp]; 7492 const PetscInt *rmap = rmapa[cp]; 7493 const PetscInt mr = mp[cp]->rmap->n; 7494 const PetscInt rs = C->rmap->rstart; 7495 const PetscInt re = C->rmap->rend; 7496 const PetscInt cs = C->cmap->rstart; 7497 7498 if (mptmp[cp]) continue; 7499 if (rmapt[cp] == 1) { /* consecutive rows */ 7500 /* fill coo_i */ 7501 for (i = 0; i < mr; i++) { 7502 const PetscInt gr = i + rs; 7503 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7504 } 7505 /* fill coo_j */ 7506 if (!cmapt[cp]) { /* type-0, already global */ 7507 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7508 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7509 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7510 } else { /* type-2, local to global for sparse columns */ 7511 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7512 } 7513 ncoo_d += mm->nz; 7514 } else if (rmapt[cp] == 2) { /* sparse rows */ 7515 for (i = 0; i < mr; i++) { 7516 const PetscInt *jj = mm->j + ii[i]; 7517 const PetscInt gr = rmap[i]; 7518 const PetscInt nz = ii[i + 1] - ii[i]; 7519 if (gr >= rs && gr < re) { /* local rows */ 7520 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7521 if (!cmapt[cp]) { /* type-0, already global */ 7522 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7523 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7524 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7525 } else { /* type-2, local to global for sparse columns */ 7526 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7527 } 7528 ncoo_d += nz; 7529 } 7530 } 7531 } 7532 } 7533 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7534 PetscCall(ISDestroy(&glob)); 7535 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7536 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7537 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7538 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7539 7540 /* preallocate with COO data */ 7541 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7542 PetscCall(PetscFree2(coo_i, coo_j)); 7543 PetscFunctionReturn(PETSC_SUCCESS); 7544 } 7545 7546 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7547 { 7548 Mat_Product *product = mat->product; 7549 #if defined(PETSC_HAVE_DEVICE) 7550 PetscBool match = PETSC_FALSE; 7551 PetscBool usecpu = PETSC_FALSE; 7552 #else 7553 PetscBool match = PETSC_TRUE; 7554 #endif 7555 7556 PetscFunctionBegin; 7557 MatCheckProduct(mat, 1); 7558 #if defined(PETSC_HAVE_DEVICE) 7559 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7560 if (match) { /* we can always fallback to the CPU if requested */ 7561 switch (product->type) { 7562 case MATPRODUCT_AB: 7563 if (product->api_user) { 7564 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7565 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7566 PetscOptionsEnd(); 7567 } else { 7568 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7569 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7570 PetscOptionsEnd(); 7571 } 7572 break; 7573 case MATPRODUCT_AtB: 7574 if (product->api_user) { 7575 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7576 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7577 PetscOptionsEnd(); 7578 } else { 7579 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7580 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7581 PetscOptionsEnd(); 7582 } 7583 break; 7584 case MATPRODUCT_PtAP: 7585 if (product->api_user) { 7586 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7587 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7588 PetscOptionsEnd(); 7589 } else { 7590 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7591 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7592 PetscOptionsEnd(); 7593 } 7594 break; 7595 default: 7596 break; 7597 } 7598 match = (PetscBool)!usecpu; 7599 } 7600 #endif 7601 if (match) { 7602 switch (product->type) { 7603 case MATPRODUCT_AB: 7604 case MATPRODUCT_AtB: 7605 case MATPRODUCT_PtAP: 7606 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7607 break; 7608 default: 7609 break; 7610 } 7611 } 7612 /* fallback to MPIAIJ ops */ 7613 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7614 PetscFunctionReturn(PETSC_SUCCESS); 7615 } 7616 7617 /* 7618 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7619 7620 n - the number of block indices in cc[] 7621 cc - the block indices (must be large enough to contain the indices) 7622 */ 7623 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7624 { 7625 PetscInt cnt = -1, nidx, j; 7626 const PetscInt *idx; 7627 7628 PetscFunctionBegin; 7629 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7630 if (nidx) { 7631 cnt = 0; 7632 cc[cnt] = idx[0] / bs; 7633 for (j = 1; j < nidx; j++) { 7634 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7635 } 7636 } 7637 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7638 *n = cnt + 1; 7639 PetscFunctionReturn(PETSC_SUCCESS); 7640 } 7641 7642 /* 7643 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7644 7645 ncollapsed - the number of block indices 7646 collapsed - the block indices (must be large enough to contain the indices) 7647 */ 7648 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7649 { 7650 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7651 7652 PetscFunctionBegin; 7653 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7654 for (i = start + 1; i < start + bs; i++) { 7655 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7656 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7657 cprevtmp = cprev; 7658 cprev = merged; 7659 merged = cprevtmp; 7660 } 7661 *ncollapsed = nprev; 7662 if (collapsed) *collapsed = cprev; 7663 PetscFunctionReturn(PETSC_SUCCESS); 7664 } 7665 7666 /* 7667 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7668 */ 7669 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7670 { 7671 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7672 Mat tGmat; 7673 MPI_Comm comm; 7674 const PetscScalar *vals; 7675 const PetscInt *idx; 7676 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7677 MatScalar *AA; // this is checked in graph 7678 PetscBool isseqaij; 7679 Mat a, b, c; 7680 MatType jtype; 7681 7682 PetscFunctionBegin; 7683 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7684 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7685 PetscCall(MatGetType(Gmat, &jtype)); 7686 PetscCall(MatCreate(comm, &tGmat)); 7687 PetscCall(MatSetType(tGmat, jtype)); 7688 7689 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7690 Also, if the matrix is symmetric, can we skip this 7691 operation? It can be very expensive on large matrices. */ 7692 7693 // global sizes 7694 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7695 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7696 nloc = Iend - Istart; 7697 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7698 if (isseqaij) { 7699 a = Gmat; 7700 b = NULL; 7701 } else { 7702 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7703 a = d->A; 7704 b = d->B; 7705 garray = d->garray; 7706 } 7707 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7708 for (PetscInt row = 0; row < nloc; row++) { 7709 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7710 d_nnz[row] = ncols; 7711 if (ncols > maxcols) maxcols = ncols; 7712 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7713 } 7714 if (b) { 7715 for (PetscInt row = 0; row < nloc; row++) { 7716 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7717 o_nnz[row] = ncols; 7718 if (ncols > maxcols) maxcols = ncols; 7719 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7720 } 7721 } 7722 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7723 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7724 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7725 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7726 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7727 PetscCall(PetscFree2(d_nnz, o_nnz)); 7728 // 7729 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7730 nnz0 = nnz1 = 0; 7731 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7732 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7733 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7734 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7735 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7736 if (PetscRealPart(sv) > vfilter) { 7737 nnz1++; 7738 PetscInt cid = idx[jj] + Istart; //diag 7739 if (c != a) cid = garray[idx[jj]]; 7740 AA[ncol_row] = vals[jj]; 7741 AJ[ncol_row] = cid; 7742 ncol_row++; 7743 } 7744 } 7745 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7746 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7747 } 7748 } 7749 PetscCall(PetscFree2(AA, AJ)); 7750 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7751 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7752 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7753 7754 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7755 7756 *filteredG = tGmat; 7757 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7758 PetscFunctionReturn(PETSC_SUCCESS); 7759 } 7760 7761 /* 7762 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7763 7764 Input Parameter: 7765 . Amat - matrix 7766 - symmetrize - make the result symmetric 7767 + scale - scale with diagonal 7768 7769 Output Parameter: 7770 . a_Gmat - output scalar graph >= 0 7771 7772 */ 7773 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7774 { 7775 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7776 MPI_Comm comm; 7777 Mat Gmat; 7778 PetscBool ismpiaij, isseqaij; 7779 Mat a, b, c; 7780 MatType jtype; 7781 7782 PetscFunctionBegin; 7783 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7784 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7785 PetscCall(MatGetSize(Amat, &MM, &NN)); 7786 PetscCall(MatGetBlockSize(Amat, &bs)); 7787 nloc = (Iend - Istart) / bs; 7788 7789 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7790 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7791 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7792 7793 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7794 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7795 implementation */ 7796 if (bs > 1) { 7797 PetscCall(MatGetType(Amat, &jtype)); 7798 PetscCall(MatCreate(comm, &Gmat)); 7799 PetscCall(MatSetType(Gmat, jtype)); 7800 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7801 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7802 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7803 PetscInt *d_nnz, *o_nnz; 7804 MatScalar *aa, val, *AA; 7805 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7806 if (isseqaij) { 7807 a = Amat; 7808 b = NULL; 7809 } else { 7810 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7811 a = d->A; 7812 b = d->B; 7813 } 7814 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7815 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7816 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7817 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7818 const PetscInt *cols1, *cols2; 7819 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7820 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7821 nnz[brow / bs] = nc2 / bs; 7822 if (nc2 % bs) ok = 0; 7823 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7824 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7825 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7826 if (nc1 != nc2) ok = 0; 7827 else { 7828 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7829 if (cols1[jj] != cols2[jj]) ok = 0; 7830 if (cols1[jj] % bs != jj % bs) ok = 0; 7831 } 7832 } 7833 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7834 } 7835 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7836 if (!ok) { 7837 PetscCall(PetscFree2(d_nnz, o_nnz)); 7838 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7839 goto old_bs; 7840 } 7841 } 7842 } 7843 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7844 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7845 PetscCall(PetscFree2(d_nnz, o_nnz)); 7846 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7847 // diag 7848 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7849 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7850 ai = aseq->i; 7851 n = ai[brow + 1] - ai[brow]; 7852 aj = aseq->j + ai[brow]; 7853 for (int k = 0; k < n; k += bs) { // block columns 7854 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7855 val = 0; 7856 for (int ii = 0; ii < bs; ii++) { // rows in block 7857 aa = aseq->a + ai[brow + ii] + k; 7858 for (int jj = 0; jj < bs; jj++) { // columns in block 7859 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7860 } 7861 } 7862 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7863 AA[k / bs] = val; 7864 } 7865 grow = Istart / bs + brow / bs; 7866 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7867 } 7868 // off-diag 7869 if (ismpiaij) { 7870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7871 const PetscScalar *vals; 7872 const PetscInt *cols, *garray = aij->garray; 7873 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7874 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7875 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7876 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7877 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7878 AA[k / bs] = 0; 7879 AJ[cidx] = garray[cols[k]] / bs; 7880 } 7881 nc = ncols / bs; 7882 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7883 for (int ii = 0; ii < bs; ii++) { // rows in block 7884 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7885 for (int k = 0; k < ncols; k += bs) { 7886 for (int jj = 0; jj < bs; jj++) { // cols in block 7887 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7888 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7889 } 7890 } 7891 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7892 } 7893 grow = Istart / bs + brow / bs; 7894 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7895 } 7896 } 7897 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7898 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7899 PetscCall(PetscFree2(AA, AJ)); 7900 } else { 7901 const PetscScalar *vals; 7902 const PetscInt *idx; 7903 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7904 old_bs: 7905 /* 7906 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7907 */ 7908 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7909 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7910 if (isseqaij) { 7911 PetscInt max_d_nnz; 7912 /* 7913 Determine exact preallocation count for (sequential) scalar matrix 7914 */ 7915 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7916 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7917 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7918 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7919 PetscCall(PetscFree3(w0, w1, w2)); 7920 } else if (ismpiaij) { 7921 Mat Daij, Oaij; 7922 const PetscInt *garray; 7923 PetscInt max_d_nnz; 7924 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7925 /* 7926 Determine exact preallocation count for diagonal block portion of scalar matrix 7927 */ 7928 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7929 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7930 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7931 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7932 PetscCall(PetscFree3(w0, w1, w2)); 7933 /* 7934 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7935 */ 7936 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7937 o_nnz[jj] = 0; 7938 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7939 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7940 o_nnz[jj] += ncols; 7941 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7942 } 7943 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7944 } 7945 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7946 /* get scalar copy (norms) of matrix */ 7947 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7948 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7949 PetscCall(PetscFree2(d_nnz, o_nnz)); 7950 for (Ii = Istart; Ii < Iend; Ii++) { 7951 PetscInt dest_row = Ii / bs; 7952 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7953 for (jj = 0; jj < ncols; jj++) { 7954 PetscInt dest_col = idx[jj] / bs; 7955 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7956 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7957 } 7958 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7959 } 7960 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7961 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7962 } 7963 } else { 7964 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7965 else { 7966 Gmat = Amat; 7967 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7968 } 7969 if (isseqaij) { 7970 a = Gmat; 7971 b = NULL; 7972 } else { 7973 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7974 a = d->A; 7975 b = d->B; 7976 } 7977 if (filter >= 0 || scale) { 7978 /* take absolute value of each entry */ 7979 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7980 MatInfo info; 7981 PetscScalar *avals; 7982 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7983 PetscCall(MatSeqAIJGetArray(c, &avals)); 7984 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7985 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7986 } 7987 } 7988 } 7989 if (symmetrize) { 7990 PetscBool isset, issym; 7991 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7992 if (!isset || !issym) { 7993 Mat matTrans; 7994 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7995 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7996 PetscCall(MatDestroy(&matTrans)); 7997 } 7998 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7999 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8000 if (scale) { 8001 /* scale c for all diagonal values = 1 or -1 */ 8002 Vec diag; 8003 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8004 PetscCall(MatGetDiagonal(Gmat, diag)); 8005 PetscCall(VecReciprocal(diag)); 8006 PetscCall(VecSqrtAbs(diag)); 8007 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8008 PetscCall(VecDestroy(&diag)); 8009 } 8010 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8011 8012 if (filter >= 0) { 8013 Mat Fmat = NULL; /* some silly compiler needs this */ 8014 8015 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8016 PetscCall(MatDestroy(&Gmat)); 8017 Gmat = Fmat; 8018 } 8019 *a_Gmat = Gmat; 8020 PetscFunctionReturn(PETSC_SUCCESS); 8021 } 8022 8023 /* 8024 Special version for direct calls from Fortran 8025 */ 8026 #include <petsc/private/fortranimpl.h> 8027 8028 /* Change these macros so can be used in void function */ 8029 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8030 #undef PetscCall 8031 #define PetscCall(...) \ 8032 do { \ 8033 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8034 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8035 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8036 return; \ 8037 } \ 8038 } while (0) 8039 8040 #undef SETERRQ 8041 #define SETERRQ(comm, ierr, ...) \ 8042 do { \ 8043 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8044 return; \ 8045 } while (0) 8046 8047 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8048 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8049 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8050 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8051 #else 8052 #endif 8053 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8054 { 8055 Mat mat = *mmat; 8056 PetscInt m = *mm, n = *mn; 8057 InsertMode addv = *maddv; 8058 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8059 PetscScalar value; 8060 8061 MatCheckPreallocated(mat, 1); 8062 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8063 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8064 { 8065 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8066 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8067 PetscBool roworiented = aij->roworiented; 8068 8069 /* Some Variables required in the macro */ 8070 Mat A = aij->A; 8071 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8072 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8073 MatScalar *aa; 8074 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8075 Mat B = aij->B; 8076 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8077 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8078 MatScalar *ba; 8079 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8080 * cannot use "#if defined" inside a macro. */ 8081 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8082 8083 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8084 PetscInt nonew = a->nonew; 8085 MatScalar *ap1, *ap2; 8086 8087 PetscFunctionBegin; 8088 PetscCall(MatSeqAIJGetArray(A, &aa)); 8089 PetscCall(MatSeqAIJGetArray(B, &ba)); 8090 for (i = 0; i < m; i++) { 8091 if (im[i] < 0) continue; 8092 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8093 if (im[i] >= rstart && im[i] < rend) { 8094 row = im[i] - rstart; 8095 lastcol1 = -1; 8096 rp1 = aj + ai[row]; 8097 ap1 = aa + ai[row]; 8098 rmax1 = aimax[row]; 8099 nrow1 = ailen[row]; 8100 low1 = 0; 8101 high1 = nrow1; 8102 lastcol2 = -1; 8103 rp2 = bj + bi[row]; 8104 ap2 = ba + bi[row]; 8105 rmax2 = bimax[row]; 8106 nrow2 = bilen[row]; 8107 low2 = 0; 8108 high2 = nrow2; 8109 8110 for (j = 0; j < n; j++) { 8111 if (roworiented) value = v[i * n + j]; 8112 else value = v[i + j * m]; 8113 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8114 if (in[j] >= cstart && in[j] < cend) { 8115 col = in[j] - cstart; 8116 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8117 } else if (in[j] < 0) continue; 8118 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8119 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8120 } else { 8121 if (mat->was_assembled) { 8122 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8123 #if defined(PETSC_USE_CTABLE) 8124 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8125 col--; 8126 #else 8127 col = aij->colmap[in[j]] - 1; 8128 #endif 8129 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8130 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8131 col = in[j]; 8132 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8133 B = aij->B; 8134 b = (Mat_SeqAIJ *)B->data; 8135 bimax = b->imax; 8136 bi = b->i; 8137 bilen = b->ilen; 8138 bj = b->j; 8139 rp2 = bj + bi[row]; 8140 ap2 = ba + bi[row]; 8141 rmax2 = bimax[row]; 8142 nrow2 = bilen[row]; 8143 low2 = 0; 8144 high2 = nrow2; 8145 bm = aij->B->rmap->n; 8146 ba = b->a; 8147 inserted = PETSC_FALSE; 8148 } 8149 } else col = in[j]; 8150 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8151 } 8152 } 8153 } else if (!aij->donotstash) { 8154 if (roworiented) { 8155 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8156 } else { 8157 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8158 } 8159 } 8160 } 8161 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8162 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8163 } 8164 PetscFunctionReturnVoid(); 8165 } 8166 8167 /* Undefining these here since they were redefined from their original definition above! No 8168 * other PETSc functions should be defined past this point, as it is impossible to recover the 8169 * original definitions */ 8170 #undef PetscCall 8171 #undef SETERRQ 8172