1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Key: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 131 enough exist. 132 133 Level: beginner 134 135 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Key: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; 1231 header[2] = N; 1232 header[3] = nz; 1233 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1234 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1235 1236 /* fill in and store row lengths */ 1237 PetscCall(PetscMalloc1(m, &rowlens)); 1238 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1239 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1240 PetscCall(PetscFree(rowlens)); 1241 1242 /* fill in and store column indices */ 1243 PetscCall(PetscMalloc1(nz, &colidxs)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 colidxs[cnt++] = garray[B->j[jb]]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1259 PetscCall(PetscMalloc1(nz, &matvals)); 1260 for (cnt = 0, i = 0; i < m; i++) { 1261 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1266 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1267 } 1268 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1269 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1270 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1271 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1272 PetscCall(PetscFree(matvals)); 1273 1274 /* write block size option to the viewer's .info file */ 1275 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 #include <petscdraw.h> 1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1283 PetscMPIInt rank = aij->rank, size = aij->size; 1284 PetscBool isdraw, iascii, isbinary; 1285 PetscViewer sviewer; 1286 PetscViewerFormat format; 1287 1288 PetscFunctionBegin; 1289 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1292 if (iascii) { 1293 PetscCall(PetscViewerGetFormat(viewer, &format)); 1294 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1295 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1296 PetscCall(PetscMalloc1(size, &nz)); 1297 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1298 for (i = 0; i < (PetscInt)size; i++) { 1299 nmax = PetscMax(nmax, nz[i]); 1300 nmin = PetscMin(nmin, nz[i]); 1301 navg += nz[i]; 1302 } 1303 PetscCall(PetscFree(nz)); 1304 navg = navg / size; 1305 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 PetscCall(PetscViewerGetFormat(viewer, &format)); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscInt *inodes = NULL; 1312 1313 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1314 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1315 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1316 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1317 if (!inodes) { 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1319 (double)info.memory)); 1320 } else { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } 1324 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1325 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1326 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1328 PetscCall(PetscViewerFlush(viewer)); 1329 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1330 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1331 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount, inodelimit, *inodes; 1335 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1336 if (inodes) { 1337 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1338 } else { 1339 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1340 } 1341 PetscFunctionReturn(PETSC_SUCCESS); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1348 PetscCall(MatView(aij->A, viewer)); 1349 } else { 1350 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1351 } 1352 PetscFunctionReturn(PETSC_SUCCESS); 1353 } else if (iascii && size == 1) { 1354 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1355 PetscCall(MatView(aij->A, viewer)); 1356 PetscFunctionReturn(PETSC_SUCCESS); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1361 PetscCall(PetscDrawIsNull(draw, &isnull)); 1362 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1363 } 1364 1365 { /* assemble the entire matrix onto first processor */ 1366 Mat A = NULL, Av; 1367 IS isrow, iscol; 1368 1369 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1371 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1372 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1373 /* The commented code uses MatCreateSubMatrices instead */ 1374 /* 1375 Mat *AA, A = NULL, Av; 1376 IS isrow,iscol; 1377 1378 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1380 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1381 if (rank == 0) { 1382 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1383 A = AA[0]; 1384 Av = AA[0]; 1385 } 1386 PetscCall(MatDestroySubMatrices(1,&AA)); 1387 */ 1388 PetscCall(ISDestroy(&iscol)); 1389 PetscCall(ISDestroy(&isrow)); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 if (rank == 0) { 1396 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1397 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1398 } 1399 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1400 PetscCall(PetscViewerFlush(viewer)); 1401 PetscCall(MatDestroy(&A)); 1402 } 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1407 { 1408 PetscBool iascii, isdraw, issocket, isbinary; 1409 1410 PetscFunctionBegin; 1411 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1412 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1413 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1415 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1420 { 1421 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1422 Vec bb1 = NULL; 1423 PetscBool hasop; 1424 1425 PetscFunctionBegin; 1426 if (flag == SOR_APPLY_UPPER) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1432 1433 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec, -1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec, -1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1465 } 1466 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1469 its--; 1470 } 1471 while (its--) { 1472 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 PetscCall(VecScale(mat->lvec, -1.0)); 1477 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1481 } 1482 } else if (flag & SOR_EISENSTAT) { 1483 Vec xx1; 1484 1485 PetscCall(VecDuplicate(bb, &xx1)); 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1487 1488 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1489 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1490 if (!mat->diag) { 1491 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1492 PetscCall(MatGetDiagonal(matin, mat->diag)); 1493 } 1494 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1495 if (hasop) { 1496 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1497 } else { 1498 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1499 } 1500 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1501 1502 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1503 1504 /* local sweep */ 1505 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1506 PetscCall(VecAXPY(xx, 1.0, xx1)); 1507 PetscCall(VecDestroy(&xx1)); 1508 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1509 1510 PetscCall(VecDestroy(&bb1)); 1511 1512 matin->factorerrortype = mat->A->factorerrortype; 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1517 { 1518 Mat aA, aB, Aperm; 1519 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1520 PetscScalar *aa, *ba; 1521 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1522 PetscSF rowsf, sf; 1523 IS parcolp = NULL; 1524 PetscBool done; 1525 1526 PetscFunctionBegin; 1527 PetscCall(MatGetLocalSize(A, &m, &n)); 1528 PetscCall(ISGetIndices(rowp, &rwant)); 1529 PetscCall(ISGetIndices(colp, &cwant)); 1530 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1531 1532 /* Invert row permutation to find out where my rows should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1534 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1535 PetscCall(PetscSFSetFromOptions(rowsf)); 1536 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1539 1540 /* Invert column permutation to find out where my columns should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1542 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1543 PetscCall(PetscSFSetFromOptions(sf)); 1544 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1547 PetscCall(PetscSFDestroy(&sf)); 1548 1549 PetscCall(ISRestoreIndices(rowp, &rwant)); 1550 PetscCall(ISRestoreIndices(colp, &cwant)); 1551 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1552 1553 /* Find out where my gcols should go */ 1554 PetscCall(MatGetSize(aB, NULL, &ng)); 1555 PetscCall(PetscMalloc1(ng, &gcdest)); 1556 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1557 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1558 PetscCall(PetscSFSetFromOptions(sf)); 1559 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1560 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1561 PetscCall(PetscSFDestroy(&sf)); 1562 1563 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1564 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1565 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1566 for (i = 0; i < m; i++) { 1567 PetscInt row = rdest[i]; 1568 PetscMPIInt rowner; 1569 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1570 for (j = ai[i]; j < ai[i + 1]; j++) { 1571 PetscInt col = cdest[aj[j]]; 1572 PetscMPIInt cowner; 1573 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 for (j = bi[i]; j < bi[i + 1]; j++) { 1578 PetscInt col = gcdest[bj[j]]; 1579 PetscMPIInt cowner; 1580 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1581 if (rowner == cowner) dnnz[i]++; 1582 else onnz[i]++; 1583 } 1584 } 1585 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1586 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1587 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1588 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1589 PetscCall(PetscSFDestroy(&rowsf)); 1590 1591 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1592 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1593 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1594 for (i = 0; i < m; i++) { 1595 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1596 PetscInt j0, rowlen; 1597 rowlen = ai[i + 1] - ai[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1599 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1601 } 1602 rowlen = bi[i + 1] - bi[i]; 1603 for (j0 = j = 0; j < rowlen; j0 = j) { 1604 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1605 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1606 } 1607 } 1608 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1609 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1610 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1611 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1612 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1613 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1614 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1615 PetscCall(PetscFree3(work, rdest, cdest)); 1616 PetscCall(PetscFree(gcdest)); 1617 if (parcolp) PetscCall(ISDestroy(&colp)); 1618 *B = Aperm; 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1623 { 1624 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1625 1626 PetscFunctionBegin; 1627 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1628 if (ghosts) *ghosts = aij->garray; 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1633 { 1634 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1635 Mat A = mat->A, B = mat->B; 1636 PetscLogDouble isend[5], irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1641 1642 isend[0] = info->nz_used; 1643 isend[1] = info->nz_allocated; 1644 isend[2] = info->nz_unneeded; 1645 isend[3] = info->memory; 1646 isend[4] = info->mallocs; 1647 1648 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1649 1650 isend[0] += info->nz_used; 1651 isend[1] += info->nz_allocated; 1652 isend[2] += info->nz_unneeded; 1653 isend[3] += info->memory; 1654 isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1687 1688 PetscFunctionBegin; 1689 switch (op) { 1690 case MAT_NEW_NONZERO_LOCATIONS: 1691 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1692 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1693 case MAT_KEEP_NONZERO_PATTERN: 1694 case MAT_NEW_NONZERO_LOCATION_ERR: 1695 case MAT_USE_INODES: 1696 case MAT_IGNORE_ZERO_ENTRIES: 1697 case MAT_FORM_EXPLICIT_TRANSPOSE: 1698 MatCheckPreallocated(A, 1); 1699 PetscCall(MatSetOption(a->A, op, flg)); 1700 PetscCall(MatSetOption(a->B, op, flg)); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A, 1); 1704 a->roworiented = flg; 1705 1706 PetscCall(MatSetOption(a->A, op, flg)); 1707 PetscCall(MatSetOption(a->B, op, flg)); 1708 break; 1709 case MAT_FORCE_DIAGONAL_ENTRIES: 1710 case MAT_SORTED_FULL: 1711 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1712 break; 1713 case MAT_IGNORE_OFF_PROC_ENTRIES: 1714 a->donotstash = flg; 1715 break; 1716 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1717 case MAT_SPD: 1718 case MAT_SYMMETRIC: 1719 case MAT_STRUCTURALLY_SYMMETRIC: 1720 case MAT_HERMITIAN: 1721 case MAT_SYMMETRY_ETERNAL: 1722 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1723 case MAT_SPD_ETERNAL: 1724 /* if the diagonal matrix is square it inherits some of the properties above */ 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1734 } 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1741 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1742 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1743 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1744 PetscInt *cmap, *idx_p; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1748 mat->getrowactive = PETSC_TRUE; 1749 1750 if (!mat->rowvalues && (idx || v)) { 1751 /* 1752 allocate enough space to hold information from the longest row. 1753 */ 1754 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1755 PetscInt max = 1, tmp; 1756 for (i = 0; i < matin->rmap->n; i++) { 1757 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1758 if (max < tmp) max = tmp; 1759 } 1760 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1761 } 1762 1763 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1764 lrow = row - rstart; 1765 1766 pvA = &vworkA; 1767 pcA = &cworkA; 1768 pvB = &vworkB; 1769 pcB = &cworkB; 1770 if (!v) { 1771 pvA = NULL; 1772 pvB = NULL; 1773 } 1774 if (!idx) { 1775 pcA = NULL; 1776 if (!v) pcB = NULL; 1777 } 1778 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1779 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1780 nztot = nzA + nzB; 1781 1782 cmap = mat->garray; 1783 if (v || idx) { 1784 if (nztot) { 1785 /* Sort by increasing column numbers, assuming A and B already sorted */ 1786 PetscInt imark = -1; 1787 if (v) { 1788 *v = v_p = mat->rowvalues; 1789 for (i = 0; i < nzB; i++) { 1790 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1791 else break; 1792 } 1793 imark = i; 1794 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1795 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1796 } 1797 if (idx) { 1798 *idx = idx_p = mat->rowindices; 1799 if (imark > -1) { 1800 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1801 } else { 1802 for (i = 0; i < nzB; i++) { 1803 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1804 else break; 1805 } 1806 imark = i; 1807 } 1808 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1809 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1810 } 1811 } else { 1812 if (idx) *idx = NULL; 1813 if (v) *v = NULL; 1814 } 1815 } 1816 *nz = nztot; 1817 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1818 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 1822 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1823 { 1824 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1825 1826 PetscFunctionBegin; 1827 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1828 aij->getrowactive = PETSC_FALSE; 1829 PetscFunctionReturn(PETSC_SUCCESS); 1830 } 1831 1832 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1833 { 1834 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1835 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1836 PetscInt i, j, cstart = mat->cmap->rstart; 1837 PetscReal sum = 0.0; 1838 const MatScalar *v, *amata, *bmata; 1839 1840 PetscFunctionBegin; 1841 if (aij->size == 1) { 1842 PetscCall(MatNorm(aij->A, type, norm)); 1843 } else { 1844 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1845 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1846 if (type == NORM_FROBENIUS) { 1847 v = amata; 1848 for (i = 0; i < amat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 v = bmata; 1853 for (i = 0; i < bmat->nz; i++) { 1854 sum += PetscRealPart(PetscConj(*v) * (*v)); 1855 v++; 1856 } 1857 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1858 *norm = PetscSqrtReal(*norm); 1859 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1860 } else if (type == NORM_1) { /* max column norm */ 1861 PetscReal *tmp, *tmp2; 1862 PetscInt *jj, *garray = aij->garray; 1863 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1864 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1865 *norm = 0.0; 1866 v = amata; 1867 jj = amat->j; 1868 for (j = 0; j < amat->nz; j++) { 1869 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 v = bmata; 1873 jj = bmat->j; 1874 for (j = 0; j < bmat->nz; j++) { 1875 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1876 v++; 1877 } 1878 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1879 for (j = 0; j < mat->cmap->N; j++) { 1880 if (tmp2[j] > *norm) *norm = tmp2[j]; 1881 } 1882 PetscCall(PetscFree(tmp)); 1883 PetscCall(PetscFree(tmp2)); 1884 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1885 } else if (type == NORM_INFINITY) { /* max row norm */ 1886 PetscReal ntemp = 0.0; 1887 for (j = 0; j < aij->A->rmap->n; j++) { 1888 v = amata + amat->i[j]; 1889 sum = 0.0; 1890 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 v = bmata + bmat->i[j]; 1895 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1896 sum += PetscAbsScalar(*v); 1897 v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1902 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1904 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1905 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1906 } 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1911 { 1912 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1913 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1914 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1915 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1916 Mat B, A_diag, *B_diag; 1917 const MatScalar *pbv, *bv; 1918 1919 PetscFunctionBegin; 1920 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1921 ma = A->rmap->n; 1922 na = A->cmap->n; 1923 mb = a->B->rmap->n; 1924 nb = a->B->cmap->n; 1925 ai = Aloc->i; 1926 aj = Aloc->j; 1927 bi = Bloc->i; 1928 bj = Bloc->j; 1929 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1930 PetscInt *d_nnz, *g_nnz, *o_nnz; 1931 PetscSFNode *oloc; 1932 PETSC_UNUSED PetscSF sf; 1933 1934 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1935 /* compute d_nnz for preallocation */ 1936 PetscCall(PetscArrayzero(d_nnz, na)); 1937 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1938 /* compute local off-diagonal contributions */ 1939 PetscCall(PetscArrayzero(g_nnz, nb)); 1940 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1941 /* map those to global */ 1942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1943 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1944 PetscCall(PetscSFSetFromOptions(sf)); 1945 PetscCall(PetscArrayzero(o_nnz, na)); 1946 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1947 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1948 PetscCall(PetscSFDestroy(&sf)); 1949 1950 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1951 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1952 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1953 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1954 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1955 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1956 } else { 1957 B = *matout; 1958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1959 } 1960 1961 b = (Mat_MPIAIJ *)B->data; 1962 A_diag = a->A; 1963 B_diag = &b->A; 1964 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1965 A_diag_ncol = A_diag->cmap->N; 1966 B_diag_ilen = sub_B_diag->ilen; 1967 B_diag_i = sub_B_diag->i; 1968 1969 /* Set ilen for diagonal of B */ 1970 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1971 1972 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1973 very quickly (=without using MatSetValues), because all writes are local. */ 1974 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1975 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1976 1977 /* copy over the B part */ 1978 PetscCall(PetscMalloc1(bi[mb], &cols)); 1979 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1980 pbv = bv; 1981 row = A->rmap->rstart; 1982 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i = 0; i < mb; i++) { 1985 ncol = bi[i + 1] - bi[i]; 1986 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1987 row++; 1988 pbv += ncol; 1989 cols_tmp += ncol; 1990 } 1991 PetscCall(PetscFree(cols)); 1992 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1993 1994 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1995 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1996 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1997 *matout = B; 1998 } else { 1999 PetscCall(MatHeaderMerge(A, &B)); 2000 } 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2007 Mat a = aij->A, b = aij->B; 2008 PetscInt s1, s2, s3; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2012 if (rr) { 2013 PetscCall(VecGetLocalSize(rr, &s1)); 2014 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2015 /* Overlap communication with computation. */ 2016 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2017 } 2018 if (ll) { 2019 PetscCall(VecGetLocalSize(ll, &s1)); 2020 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2021 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2022 } 2023 /* scale the diagonal block */ 2024 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2025 2026 if (rr) { 2027 /* Do a scatter end and then right scale the off-diagonal block */ 2028 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2029 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2030 } 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 2038 PetscFunctionBegin; 2039 PetscCall(MatSetUnfactored(a->A)); 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2044 { 2045 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2046 Mat a, b, c, d; 2047 PetscBool flg; 2048 2049 PetscFunctionBegin; 2050 a = matA->A; 2051 b = matA->B; 2052 c = matB->A; 2053 d = matB->B; 2054 2055 PetscCall(MatEqual(a, c, &flg)); 2056 if (flg) PetscCall(MatEqual(b, d, &flg)); 2057 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2058 PetscFunctionReturn(PETSC_SUCCESS); 2059 } 2060 2061 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2062 { 2063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2064 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2065 2066 PetscFunctionBegin; 2067 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2068 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2069 /* because of the column compression in the off-processor part of the matrix a->B, 2070 the number of columns in a->B and b->B may be different, hence we cannot call 2071 the MatCopy() directly on the two parts. If need be, we can provide a more 2072 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2073 then copying the submatrices */ 2074 PetscCall(MatCopy_Basic(A, B, str)); 2075 } else { 2076 PetscCall(MatCopy(a->A, b->A, str)); 2077 PetscCall(MatCopy(a->B, b->B, str)); 2078 } 2079 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* 2084 Computes the number of nonzeros per row needed for preallocation when X and Y 2085 have different nonzero structure. 2086 */ 2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2088 { 2089 PetscInt i, j, k, nzx, nzy; 2090 2091 PetscFunctionBegin; 2092 /* Set the number of nonzeros in the new matrix */ 2093 for (i = 0; i < m; i++) { 2094 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2095 nzx = xi[i + 1] - xi[i]; 2096 nzy = yi[i + 1] - yi[i]; 2097 nnz[i] = 0; 2098 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2099 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2100 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2101 nnz[i]++; 2102 } 2103 for (; k < nzy; k++) nnz[i]++; 2104 } 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2110 { 2111 PetscInt m = Y->rmap->N; 2112 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2113 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2114 2115 PetscFunctionBegin; 2116 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2117 PetscFunctionReturn(PETSC_SUCCESS); 2118 } 2119 2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2121 { 2122 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2123 2124 PetscFunctionBegin; 2125 if (str == SAME_NONZERO_PATTERN) { 2126 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2127 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2128 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2129 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2130 } else { 2131 Mat B; 2132 PetscInt *nnz_d, *nnz_o; 2133 2134 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2135 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2136 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2137 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2138 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2139 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2140 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2141 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2142 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2143 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2144 PetscCall(MatHeaderMerge(Y, &B)); 2145 PetscCall(PetscFree(nnz_d)); 2146 PetscCall(PetscFree(nnz_o)); 2147 } 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2152 2153 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2154 { 2155 PetscFunctionBegin; 2156 if (PetscDefined(USE_COMPLEX)) { 2157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2158 2159 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2160 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2161 } 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2166 { 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2168 2169 PetscFunctionBegin; 2170 PetscCall(MatRealPart(a->A)); 2171 PetscCall(MatRealPart(a->B)); 2172 PetscFunctionReturn(PETSC_SUCCESS); 2173 } 2174 2175 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2176 { 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 2179 PetscFunctionBegin; 2180 PetscCall(MatImaginaryPart(a->A)); 2181 PetscCall(MatImaginaryPart(a->B)); 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2188 PetscInt i, *idxb = NULL, m = A->rmap->n; 2189 PetscScalar *va, *vv; 2190 Vec vB, vA; 2191 const PetscScalar *vb; 2192 2193 PetscFunctionBegin; 2194 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2195 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2196 2197 PetscCall(VecGetArrayWrite(vA, &va)); 2198 if (idx) { 2199 for (i = 0; i < m; i++) { 2200 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2201 } 2202 } 2203 2204 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2205 PetscCall(PetscMalloc1(m, &idxb)); 2206 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2207 2208 PetscCall(VecGetArrayWrite(v, &vv)); 2209 PetscCall(VecGetArrayRead(vB, &vb)); 2210 for (i = 0; i < m; i++) { 2211 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2212 vv[i] = vb[i]; 2213 if (idx) idx[i] = a->garray[idxb[i]]; 2214 } else { 2215 vv[i] = va[i]; 2216 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2217 } 2218 } 2219 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2220 PetscCall(VecRestoreArrayWrite(vA, &va)); 2221 PetscCall(VecRestoreArrayRead(vB, &vb)); 2222 PetscCall(PetscFree(idxb)); 2223 PetscCall(VecDestroy(&vA)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2229 { 2230 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2231 PetscInt m = A->rmap->n, n = A->cmap->n; 2232 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2233 PetscInt *cmap = mat->garray; 2234 PetscInt *diagIdx, *offdiagIdx; 2235 Vec diagV, offdiagV; 2236 PetscScalar *a, *diagA, *offdiagA; 2237 const PetscScalar *ba, *bav; 2238 PetscInt r, j, col, ncols, *bi, *bj; 2239 Mat B = mat->B; 2240 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2241 2242 PetscFunctionBegin; 2243 /* When a process holds entire A and other processes have no entry */ 2244 if (A->cmap->N == n) { 2245 PetscCall(VecGetArrayWrite(v, &diagA)); 2246 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2247 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2248 PetscCall(VecDestroy(&diagV)); 2249 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } else if (n == 0) { 2252 if (m) { 2253 PetscCall(VecGetArrayWrite(v, &a)); 2254 for (r = 0; r < m; r++) { 2255 a[r] = 0.0; 2256 if (idx) idx[r] = -1; 2257 } 2258 PetscCall(VecRestoreArrayWrite(v, &a)); 2259 } 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2265 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2266 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2267 2268 /* Get offdiagIdx[] for implicit 0.0 */ 2269 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2270 ba = bav; 2271 bi = b->i; 2272 bj = b->j; 2273 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2274 for (r = 0; r < m; r++) { 2275 ncols = bi[r + 1] - bi[r]; 2276 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2277 offdiagA[r] = *ba; 2278 offdiagIdx[r] = cmap[0]; 2279 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2280 offdiagA[r] = 0.0; 2281 2282 /* Find first hole in the cmap */ 2283 for (j = 0; j < ncols; j++) { 2284 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2285 if (col > j && j < cstart) { 2286 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2287 break; 2288 } else if (col > j + n && j >= cstart) { 2289 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2290 break; 2291 } 2292 } 2293 if (j == ncols && ncols < A->cmap->N - n) { 2294 /* a hole is outside compressed Bcols */ 2295 if (ncols == 0) { 2296 if (cstart) { 2297 offdiagIdx[r] = 0; 2298 } else offdiagIdx[r] = cend; 2299 } else { /* ncols > 0 */ 2300 offdiagIdx[r] = cmap[ncols - 1] + 1; 2301 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2302 } 2303 } 2304 } 2305 2306 for (j = 0; j < ncols; j++) { 2307 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2308 offdiagA[r] = *ba; 2309 offdiagIdx[r] = cmap[*bj]; 2310 } 2311 ba++; 2312 bj++; 2313 } 2314 } 2315 2316 PetscCall(VecGetArrayWrite(v, &a)); 2317 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2318 for (r = 0; r < m; ++r) { 2319 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2320 a[r] = diagA[r]; 2321 if (idx) idx[r] = cstart + diagIdx[r]; 2322 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) { 2325 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2326 idx[r] = cstart + diagIdx[r]; 2327 } else idx[r] = offdiagIdx[r]; 2328 } 2329 } else { 2330 a[r] = offdiagA[r]; 2331 if (idx) idx[r] = offdiagIdx[r]; 2332 } 2333 } 2334 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2335 PetscCall(VecRestoreArrayWrite(v, &a)); 2336 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2337 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2338 PetscCall(VecDestroy(&diagV)); 2339 PetscCall(VecDestroy(&offdiagV)); 2340 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2347 PetscInt m = A->rmap->n, n = A->cmap->n; 2348 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2349 PetscInt *cmap = mat->garray; 2350 PetscInt *diagIdx, *offdiagIdx; 2351 Vec diagV, offdiagV; 2352 PetscScalar *a, *diagA, *offdiagA; 2353 const PetscScalar *ba, *bav; 2354 PetscInt r, j, col, ncols, *bi, *bj; 2355 Mat B = mat->B; 2356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2357 2358 PetscFunctionBegin; 2359 /* When a process holds entire A and other processes have no entry */ 2360 if (A->cmap->N == n) { 2361 PetscCall(VecGetArrayWrite(v, &diagA)); 2362 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2364 PetscCall(VecDestroy(&diagV)); 2365 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } else if (n == 0) { 2368 if (m) { 2369 PetscCall(VecGetArrayWrite(v, &a)); 2370 for (r = 0; r < m; r++) { 2371 a[r] = PETSC_MAX_REAL; 2372 if (idx) idx[r] = -1; 2373 } 2374 PetscCall(VecRestoreArrayWrite(v, &a)); 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2381 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2382 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2383 2384 /* Get offdiagIdx[] for implicit 0.0 */ 2385 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2386 ba = bav; 2387 bi = b->i; 2388 bj = b->j; 2389 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2390 for (r = 0; r < m; r++) { 2391 ncols = bi[r + 1] - bi[r]; 2392 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2393 offdiagA[r] = *ba; 2394 offdiagIdx[r] = cmap[0]; 2395 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2396 offdiagA[r] = 0.0; 2397 2398 /* Find first hole in the cmap */ 2399 for (j = 0; j < ncols; j++) { 2400 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2401 if (col > j && j < cstart) { 2402 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2403 break; 2404 } else if (col > j + n && j >= cstart) { 2405 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2406 break; 2407 } 2408 } 2409 if (j == ncols && ncols < A->cmap->N - n) { 2410 /* a hole is outside compressed Bcols */ 2411 if (ncols == 0) { 2412 if (cstart) { 2413 offdiagIdx[r] = 0; 2414 } else offdiagIdx[r] = cend; 2415 } else { /* ncols > 0 */ 2416 offdiagIdx[r] = cmap[ncols - 1] + 1; 2417 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2418 } 2419 } 2420 } 2421 2422 for (j = 0; j < ncols; j++) { 2423 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2424 offdiagA[r] = *ba; 2425 offdiagIdx[r] = cmap[*bj]; 2426 } 2427 ba++; 2428 bj++; 2429 } 2430 } 2431 2432 PetscCall(VecGetArrayWrite(v, &a)); 2433 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2434 for (r = 0; r < m; ++r) { 2435 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 if (idx) idx[r] = cstart + diagIdx[r]; 2438 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) { 2441 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2442 idx[r] = cstart + diagIdx[r]; 2443 } else idx[r] = offdiagIdx[r]; 2444 } 2445 } else { 2446 a[r] = offdiagA[r]; 2447 if (idx) idx[r] = offdiagIdx[r]; 2448 } 2449 } 2450 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2451 PetscCall(VecRestoreArrayWrite(v, &a)); 2452 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2453 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2454 PetscCall(VecDestroy(&diagV)); 2455 PetscCall(VecDestroy(&offdiagV)); 2456 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2461 { 2462 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2463 PetscInt m = A->rmap->n, n = A->cmap->n; 2464 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2465 PetscInt *cmap = mat->garray; 2466 PetscInt *diagIdx, *offdiagIdx; 2467 Vec diagV, offdiagV; 2468 PetscScalar *a, *diagA, *offdiagA; 2469 const PetscScalar *ba, *bav; 2470 PetscInt r, j, col, ncols, *bi, *bj; 2471 Mat B = mat->B; 2472 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2473 2474 PetscFunctionBegin; 2475 /* When a process holds entire A and other processes have no entry */ 2476 if (A->cmap->N == n) { 2477 PetscCall(VecGetArrayWrite(v, &diagA)); 2478 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2479 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2480 PetscCall(VecDestroy(&diagV)); 2481 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } else if (n == 0) { 2484 if (m) { 2485 PetscCall(VecGetArrayWrite(v, &a)); 2486 for (r = 0; r < m; r++) { 2487 a[r] = PETSC_MIN_REAL; 2488 if (idx) idx[r] = -1; 2489 } 2490 PetscCall(VecRestoreArrayWrite(v, &a)); 2491 } 2492 PetscFunctionReturn(PETSC_SUCCESS); 2493 } 2494 2495 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2497 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2498 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2499 2500 /* Get offdiagIdx[] for implicit 0.0 */ 2501 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2502 ba = bav; 2503 bi = b->i; 2504 bj = b->j; 2505 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2506 for (r = 0; r < m; r++) { 2507 ncols = bi[r + 1] - bi[r]; 2508 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2509 offdiagA[r] = *ba; 2510 offdiagIdx[r] = cmap[0]; 2511 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2512 offdiagA[r] = 0.0; 2513 2514 /* Find first hole in the cmap */ 2515 for (j = 0; j < ncols; j++) { 2516 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2517 if (col > j && j < cstart) { 2518 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2519 break; 2520 } else if (col > j + n && j >= cstart) { 2521 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2522 break; 2523 } 2524 } 2525 if (j == ncols && ncols < A->cmap->N - n) { 2526 /* a hole is outside compressed Bcols */ 2527 if (ncols == 0) { 2528 if (cstart) { 2529 offdiagIdx[r] = 0; 2530 } else offdiagIdx[r] = cend; 2531 } else { /* ncols > 0 */ 2532 offdiagIdx[r] = cmap[ncols - 1] + 1; 2533 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2534 } 2535 } 2536 } 2537 2538 for (j = 0; j < ncols; j++) { 2539 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2540 offdiagA[r] = *ba; 2541 offdiagIdx[r] = cmap[*bj]; 2542 } 2543 ba++; 2544 bj++; 2545 } 2546 } 2547 2548 PetscCall(VecGetArrayWrite(v, &a)); 2549 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2550 for (r = 0; r < m; ++r) { 2551 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) idx[r] = cstart + diagIdx[r]; 2554 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) { 2557 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2558 idx[r] = cstart + diagIdx[r]; 2559 } else idx[r] = offdiagIdx[r]; 2560 } 2561 } else { 2562 a[r] = offdiagA[r]; 2563 if (idx) idx[r] = offdiagIdx[r]; 2564 } 2565 } 2566 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2567 PetscCall(VecRestoreArrayWrite(v, &a)); 2568 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2569 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2570 PetscCall(VecDestroy(&diagV)); 2571 PetscCall(VecDestroy(&offdiagV)); 2572 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2577 { 2578 Mat *dummy; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2582 *newmat = *dummy; 2583 PetscCall(PetscFree(dummy)); 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2590 2591 PetscFunctionBegin; 2592 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2593 A->factorerrortype = a->A->factorerrortype; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2598 { 2599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2600 2601 PetscFunctionBegin; 2602 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2603 PetscCall(MatSetRandom(aij->A, rctx)); 2604 if (x->assembled) { 2605 PetscCall(MatSetRandom(aij->B, rctx)); 2606 } else { 2607 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2608 } 2609 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2610 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2611 PetscFunctionReturn(PETSC_SUCCESS); 2612 } 2613 2614 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2615 { 2616 PetscFunctionBegin; 2617 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2618 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2619 PetscFunctionReturn(PETSC_SUCCESS); 2620 } 2621 2622 /*@ 2623 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2624 2625 Not collective 2626 2627 Input Parameter: 2628 . A - the matrix 2629 2630 Output Parameter: 2631 . nz - the number of nonzeros 2632 2633 Level: advanced 2634 2635 .seealso: `MATMPIAIJ`, `Mat` 2636 @*/ 2637 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2638 { 2639 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2640 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2641 2642 PetscFunctionBegin; 2643 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@ 2648 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2649 2650 Collective 2651 2652 Input Parameters: 2653 + A - the matrix 2654 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2655 2656 Level: advanced 2657 2658 .seealso: `Mat`, `MATMPIAIJ` 2659 @*/ 2660 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2661 { 2662 PetscFunctionBegin; 2663 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2668 { 2669 PetscBool sc = PETSC_FALSE, flg; 2670 2671 PetscFunctionBegin; 2672 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2673 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2674 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2675 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2676 PetscOptionsHeadEnd(); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2681 { 2682 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2683 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2684 2685 PetscFunctionBegin; 2686 if (!Y->preallocated) { 2687 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2688 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2689 PetscInt nonew = aij->nonew; 2690 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2691 aij->nonew = nonew; 2692 } 2693 PetscCall(MatShift_Basic(Y, a)); 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2700 2701 PetscFunctionBegin; 2702 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2703 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2704 if (d) { 2705 PetscInt rstart; 2706 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2707 *d += rstart; 2708 } 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2715 2716 PetscFunctionBegin; 2717 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2718 PetscFunctionReturn(PETSC_SUCCESS); 2719 } 2720 2721 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2722 { 2723 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2724 2725 PetscFunctionBegin; 2726 PetscCall(MatEliminateZeros(a->A)); 2727 PetscCall(MatEliminateZeros(a->B)); 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2732 MatGetRow_MPIAIJ, 2733 MatRestoreRow_MPIAIJ, 2734 MatMult_MPIAIJ, 2735 /* 4*/ MatMultAdd_MPIAIJ, 2736 MatMultTranspose_MPIAIJ, 2737 MatMultTransposeAdd_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*10*/ NULL, 2742 NULL, 2743 NULL, 2744 MatSOR_MPIAIJ, 2745 MatTranspose_MPIAIJ, 2746 /*15*/ MatGetInfo_MPIAIJ, 2747 MatEqual_MPIAIJ, 2748 MatGetDiagonal_MPIAIJ, 2749 MatDiagonalScale_MPIAIJ, 2750 MatNorm_MPIAIJ, 2751 /*20*/ MatAssemblyBegin_MPIAIJ, 2752 MatAssemblyEnd_MPIAIJ, 2753 MatSetOption_MPIAIJ, 2754 MatZeroEntries_MPIAIJ, 2755 /*24*/ MatZeroRows_MPIAIJ, 2756 NULL, 2757 NULL, 2758 NULL, 2759 NULL, 2760 /*29*/ MatSetUp_MPI_Hash, 2761 NULL, 2762 NULL, 2763 MatGetDiagonalBlock_MPIAIJ, 2764 NULL, 2765 /*34*/ MatDuplicate_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*39*/ MatAXPY_MPIAIJ, 2771 MatCreateSubMatrices_MPIAIJ, 2772 MatIncreaseOverlap_MPIAIJ, 2773 MatGetValues_MPIAIJ, 2774 MatCopy_MPIAIJ, 2775 /*44*/ MatGetRowMax_MPIAIJ, 2776 MatScale_MPIAIJ, 2777 MatShift_MPIAIJ, 2778 MatDiagonalSet_MPIAIJ, 2779 MatZeroRowsColumns_MPIAIJ, 2780 /*49*/ MatSetRandom_MPIAIJ, 2781 MatGetRowIJ_MPIAIJ, 2782 MatRestoreRowIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*54*/ MatFDColoringCreate_MPIXAIJ, 2786 NULL, 2787 MatSetUnfactored_MPIAIJ, 2788 MatPermute_MPIAIJ, 2789 NULL, 2790 /*59*/ MatCreateSubMatrix_MPIAIJ, 2791 MatDestroy_MPIAIJ, 2792 MatView_MPIAIJ, 2793 NULL, 2794 NULL, 2795 /*64*/ NULL, 2796 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2801 MatGetRowMinAbs_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*75*/ MatFDColoringApply_AIJ, 2807 MatSetFromOptions_MPIAIJ, 2808 NULL, 2809 NULL, 2810 MatFindZeroDiagonals_MPIAIJ, 2811 /*80*/ NULL, 2812 NULL, 2813 NULL, 2814 /*83*/ MatLoad_MPIAIJ, 2815 MatIsSymmetric_MPIAIJ, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*89*/ NULL, 2821 NULL, 2822 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 NULL, 2829 MatBindToCPU_MPIAIJ, 2830 /*99*/ MatProductSetFromOptions_MPIAIJ, 2831 NULL, 2832 NULL, 2833 MatConjugate_MPIAIJ, 2834 NULL, 2835 /*104*/ MatSetValuesRow_MPIAIJ, 2836 MatRealPart_MPIAIJ, 2837 MatImaginaryPart_MPIAIJ, 2838 NULL, 2839 NULL, 2840 /*109*/ NULL, 2841 NULL, 2842 MatGetRowMin_MPIAIJ, 2843 NULL, 2844 MatMissingDiagonal_MPIAIJ, 2845 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2846 NULL, 2847 MatGetGhosts_MPIAIJ, 2848 NULL, 2849 NULL, 2850 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2851 NULL, 2852 NULL, 2853 NULL, 2854 MatGetMultiProcBlock_MPIAIJ, 2855 /*124*/ MatFindNonzeroRows_MPIAIJ, 2856 MatGetColumnReductions_MPIAIJ, 2857 MatInvertBlockDiagonal_MPIAIJ, 2858 MatInvertVariableBlockDiagonal_MPIAIJ, 2859 MatCreateSubMatricesMPI_MPIAIJ, 2860 /*129*/ NULL, 2861 NULL, 2862 NULL, 2863 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2864 NULL, 2865 /*134*/ NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*139*/ MatSetBlockSizes_MPIAIJ, 2871 NULL, 2872 NULL, 2873 MatFDColoringSetUp_MPIXAIJ, 2874 MatFindOffBlockDiagonalEntries_MPIAIJ, 2875 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2876 /*145*/ NULL, 2877 NULL, 2878 NULL, 2879 MatCreateGraph_Simple_AIJ, 2880 NULL, 2881 /*150*/ NULL, 2882 MatEliminateZeros_MPIAIJ}; 2883 2884 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatStoreValues(aij->A)); 2890 PetscCall(MatStoreValues(aij->B)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2895 { 2896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2897 2898 PetscFunctionBegin; 2899 PetscCall(MatRetrieveValues(aij->A)); 2900 PetscCall(MatRetrieveValues(aij->B)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2905 { 2906 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2907 PetscMPIInt size; 2908 2909 PetscFunctionBegin; 2910 if (B->hash_active) { 2911 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2912 B->hash_active = PETSC_FALSE; 2913 } 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 2917 #if defined(PETSC_USE_CTABLE) 2918 PetscCall(PetscHMapIDestroy(&b->colmap)); 2919 #else 2920 PetscCall(PetscFree(b->colmap)); 2921 #endif 2922 PetscCall(PetscFree(b->garray)); 2923 PetscCall(VecDestroy(&b->lvec)); 2924 PetscCall(VecScatterDestroy(&b->Mvctx)); 2925 2926 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 2933 PetscCall(MatDestroy(&b->A)); 2934 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2935 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2936 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2937 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2938 2939 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2940 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2941 B->preallocated = PETSC_TRUE; 2942 B->was_assembled = PETSC_FALSE; 2943 B->assembled = PETSC_FALSE; 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2948 { 2949 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2950 2951 PetscFunctionBegin; 2952 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 2956 #if defined(PETSC_USE_CTABLE) 2957 PetscCall(PetscHMapIDestroy(&b->colmap)); 2958 #else 2959 PetscCall(PetscFree(b->colmap)); 2960 #endif 2961 PetscCall(PetscFree(b->garray)); 2962 PetscCall(VecDestroy(&b->lvec)); 2963 PetscCall(VecScatterDestroy(&b->Mvctx)); 2964 2965 PetscCall(MatResetPreallocation(b->A)); 2966 PetscCall(MatResetPreallocation(b->B)); 2967 B->preallocated = PETSC_TRUE; 2968 B->was_assembled = PETSC_FALSE; 2969 B->assembled = PETSC_FALSE; 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2974 { 2975 Mat mat; 2976 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2977 2978 PetscFunctionBegin; 2979 *newmat = NULL; 2980 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2981 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2982 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2983 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2984 a = (Mat_MPIAIJ *)mat->data; 2985 2986 mat->factortype = matin->factortype; 2987 mat->assembled = matin->assembled; 2988 mat->insertmode = NOT_SET_VALUES; 2989 mat->preallocated = matin->preallocated; 2990 2991 a->size = oldmat->size; 2992 a->rank = oldmat->rank; 2993 a->donotstash = oldmat->donotstash; 2994 a->roworiented = oldmat->roworiented; 2995 a->rowindices = NULL; 2996 a->rowvalues = NULL; 2997 a->getrowactive = PETSC_FALSE; 2998 2999 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3000 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3001 3002 if (oldmat->colmap) { 3003 #if defined(PETSC_USE_CTABLE) 3004 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3005 #else 3006 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3007 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3008 #endif 3009 } else a->colmap = NULL; 3010 if (oldmat->garray) { 3011 PetscInt len; 3012 len = oldmat->B->cmap->n; 3013 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3014 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3015 } else a->garray = NULL; 3016 3017 /* It may happen MatDuplicate is called with a non-assembled matrix 3018 In fact, MatDuplicate only requires the matrix to be preallocated 3019 This may happen inside a DMCreateMatrix_Shell */ 3020 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3021 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3022 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3023 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3024 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3025 *newmat = mat; 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3030 { 3031 PetscBool isbinary, ishdf5; 3032 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3035 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3036 /* force binary viewer to load .info file if it has not yet done so */ 3037 PetscCall(PetscViewerSetUp(viewer)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3040 if (isbinary) { 3041 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3042 } else if (ishdf5) { 3043 #if defined(PETSC_HAVE_HDF5) 3044 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3045 #else 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3047 #endif 3048 } else { 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3055 { 3056 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3057 PetscInt *rowidxs, *colidxs; 3058 PetscScalar *matvals; 3059 3060 PetscFunctionBegin; 3061 PetscCall(PetscViewerSetUp(viewer)); 3062 3063 /* read in matrix header */ 3064 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3065 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3066 M = header[1]; 3067 N = header[2]; 3068 nz = header[3]; 3069 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3070 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3071 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3072 3073 /* set block sizes from the viewer's .info file */ 3074 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3075 /* set global sizes if not set already */ 3076 if (mat->rmap->N < 0) mat->rmap->N = M; 3077 if (mat->cmap->N < 0) mat->cmap->N = N; 3078 PetscCall(PetscLayoutSetUp(mat->rmap)); 3079 PetscCall(PetscLayoutSetUp(mat->cmap)); 3080 3081 /* check if the matrix sizes are correct */ 3082 PetscCall(MatGetSize(mat, &rows, &cols)); 3083 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3084 3085 /* read in row lengths and build row indices */ 3086 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3087 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3088 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3089 rowidxs[0] = 0; 3090 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3091 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 /* read in column indices and matrix values */ 3094 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3095 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3097 /* store matrix indices and values */ 3098 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3099 PetscCall(PetscFree(rowidxs)); 3100 PetscCall(PetscFree2(colidxs, matvals)); 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 3104 /* Not scalable because of ISAllGather() unless getting all columns. */ 3105 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3106 { 3107 IS iscol_local; 3108 PetscBool isstride; 3109 PetscMPIInt lisstride = 0, gisstride; 3110 3111 PetscFunctionBegin; 3112 /* check if we are grabbing all columns*/ 3113 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3114 3115 if (isstride) { 3116 PetscInt start, len, mstart, mlen; 3117 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3118 PetscCall(ISGetLocalSize(iscol, &len)); 3119 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3120 if (mstart == start && mlen - mstart == len) lisstride = 1; 3121 } 3122 3123 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3124 if (gisstride) { 3125 PetscInt N; 3126 PetscCall(MatGetSize(mat, NULL, &N)); 3127 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3128 PetscCall(ISSetIdentity(iscol_local)); 3129 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3130 } else { 3131 PetscInt cbs; 3132 PetscCall(ISGetBlockSize(iscol, &cbs)); 3133 PetscCall(ISAllGather(iscol, &iscol_local)); 3134 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3135 } 3136 3137 *isseq = iscol_local; 3138 PetscFunctionReturn(PETSC_SUCCESS); 3139 } 3140 3141 /* 3142 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3143 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3144 3145 Input Parameters: 3146 + mat - matrix 3147 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3148 i.e., mat->rstart <= isrow[i] < mat->rend 3149 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->cstart <= iscol[i] < mat->cend 3151 3152 Output Parameters: 3153 + isrow_d - sequential row index set for retrieving mat->A 3154 . iscol_d - sequential column index set for retrieving mat->A 3155 . iscol_o - sequential column index set for retrieving mat->B 3156 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3157 */ 3158 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3159 { 3160 Vec x, cmap; 3161 const PetscInt *is_idx; 3162 PetscScalar *xarray, *cmaparray; 3163 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3165 Mat B = a->B; 3166 Vec lvec = a->lvec, lcmap; 3167 PetscInt i, cstart, cend, Bn = B->cmap->N; 3168 MPI_Comm comm; 3169 VecScatter Mvctx = a->Mvctx; 3170 3171 PetscFunctionBegin; 3172 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3173 PetscCall(ISGetLocalSize(iscol, &ncols)); 3174 3175 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3176 PetscCall(MatCreateVecs(mat, &x, NULL)); 3177 PetscCall(VecSet(x, -1.0)); 3178 PetscCall(VecDuplicate(x, &cmap)); 3179 PetscCall(VecSet(cmap, -1.0)); 3180 3181 /* Get start indices */ 3182 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3183 isstart -= ncols; 3184 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3185 3186 PetscCall(ISGetIndices(iscol, &is_idx)); 3187 PetscCall(VecGetArray(x, &xarray)); 3188 PetscCall(VecGetArray(cmap, &cmaparray)); 3189 PetscCall(PetscMalloc1(ncols, &idx)); 3190 for (i = 0; i < ncols; i++) { 3191 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3192 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3193 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3194 } 3195 PetscCall(VecRestoreArray(x, &xarray)); 3196 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3197 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3198 3199 /* Get iscol_d */ 3200 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3201 PetscCall(ISGetBlockSize(iscol, &i)); 3202 PetscCall(ISSetBlockSize(*iscol_d, i)); 3203 3204 /* Get isrow_d */ 3205 PetscCall(ISGetLocalSize(isrow, &m)); 3206 rstart = mat->rmap->rstart; 3207 PetscCall(PetscMalloc1(m, &idx)); 3208 PetscCall(ISGetIndices(isrow, &is_idx)); 3209 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3210 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3211 3212 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3213 PetscCall(ISGetBlockSize(isrow, &i)); 3214 PetscCall(ISSetBlockSize(*isrow_d, i)); 3215 3216 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3217 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3219 3220 PetscCall(VecDuplicate(lvec, &lcmap)); 3221 3222 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3224 3225 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3226 /* off-process column indices */ 3227 count = 0; 3228 PetscCall(PetscMalloc1(Bn, &idx)); 3229 PetscCall(PetscMalloc1(Bn, &cmap1)); 3230 3231 PetscCall(VecGetArray(lvec, &xarray)); 3232 PetscCall(VecGetArray(lcmap, &cmaparray)); 3233 for (i = 0; i < Bn; i++) { 3234 if (PetscRealPart(xarray[i]) > -1.0) { 3235 idx[count] = i; /* local column index in off-diagonal part B */ 3236 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3237 count++; 3238 } 3239 } 3240 PetscCall(VecRestoreArray(lvec, &xarray)); 3241 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3242 3243 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3244 /* cannot ensure iscol_o has same blocksize as iscol! */ 3245 3246 PetscCall(PetscFree(idx)); 3247 *garray = cmap1; 3248 3249 PetscCall(VecDestroy(&x)); 3250 PetscCall(VecDestroy(&cmap)); 3251 PetscCall(VecDestroy(&lcmap)); 3252 PetscFunctionReturn(PETSC_SUCCESS); 3253 } 3254 3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3257 { 3258 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3259 Mat M = NULL; 3260 MPI_Comm comm; 3261 IS iscol_d, isrow_d, iscol_o; 3262 Mat Asub = NULL, Bsub = NULL; 3263 PetscInt n; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3267 3268 if (call == MAT_REUSE_MATRIX) { 3269 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3271 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3274 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3277 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3278 3279 /* Update diagonal and off-diagonal portions of submat */ 3280 asub = (Mat_MPIAIJ *)(*submat)->data; 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3282 PetscCall(ISGetLocalSize(iscol_o, &n)); 3283 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3284 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3285 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3286 3287 } else { /* call == MAT_INITIAL_MATRIX) */ 3288 const PetscInt *garray; 3289 PetscInt BsubN; 3290 3291 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3292 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3293 3294 /* Create local submatrices Asub and Bsub */ 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3296 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3297 3298 /* Create submatrix M */ 3299 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3300 3301 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3302 asub = (Mat_MPIAIJ *)M->data; 3303 3304 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3305 n = asub->B->cmap->N; 3306 if (BsubN > n) { 3307 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3308 const PetscInt *idx; 3309 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3310 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3311 3312 PetscCall(PetscMalloc1(n, &idx_new)); 3313 j = 0; 3314 PetscCall(ISGetIndices(iscol_o, &idx)); 3315 for (i = 0; i < n; i++) { 3316 if (j >= BsubN) break; 3317 while (subgarray[i] > garray[j]) j++; 3318 3319 if (subgarray[i] == garray[j]) { 3320 idx_new[i] = idx[j++]; 3321 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3322 } 3323 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3324 3325 PetscCall(ISDestroy(&iscol_o)); 3326 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3327 3328 } else if (BsubN < n) { 3329 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3330 } 3331 3332 PetscCall(PetscFree(garray)); 3333 *submat = M; 3334 3335 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3336 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3337 PetscCall(ISDestroy(&isrow_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3340 PetscCall(ISDestroy(&iscol_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3343 PetscCall(ISDestroy(&iscol_o)); 3344 } 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3349 { 3350 IS iscol_local = NULL, isrow_d; 3351 PetscInt csize; 3352 PetscInt n, i, j, start, end; 3353 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3354 MPI_Comm comm; 3355 3356 PetscFunctionBegin; 3357 /* If isrow has same processor distribution as mat, 3358 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3359 if (call == MAT_REUSE_MATRIX) { 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3361 if (isrow_d) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3364 } else { 3365 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3366 if (iscol_local) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3369 } 3370 } 3371 } else { 3372 /* Check if isrow has same processor distribution as mat */ 3373 sameDist[0] = PETSC_FALSE; 3374 PetscCall(ISGetLocalSize(isrow, &n)); 3375 if (!n) { 3376 sameDist[0] = PETSC_TRUE; 3377 } else { 3378 PetscCall(ISGetMinMax(isrow, &i, &j)); 3379 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3380 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3381 } 3382 3383 /* Check if iscol has same processor distribution as mat */ 3384 sameDist[1] = PETSC_FALSE; 3385 PetscCall(ISGetLocalSize(iscol, &n)); 3386 if (!n) { 3387 sameDist[1] = PETSC_TRUE; 3388 } else { 3389 PetscCall(ISGetMinMax(iscol, &i, &j)); 3390 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3391 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3392 } 3393 3394 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3395 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3396 sameRowDist = tsameDist[0]; 3397 } 3398 3399 if (sameRowDist) { 3400 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3401 /* isrow and iscol have same processor distribution as mat */ 3402 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } else { /* sameRowDist */ 3405 /* isrow has same processor distribution as mat */ 3406 if (call == MAT_INITIAL_MATRIX) { 3407 PetscBool sorted; 3408 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3409 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3410 PetscCall(ISGetSize(iscol, &i)); 3411 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3412 3413 PetscCall(ISSorted(iscol_local, &sorted)); 3414 if (sorted) { 3415 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 } else { /* call == MAT_REUSE_MATRIX */ 3420 IS iscol_sub; 3421 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3422 if (iscol_sub) { 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 } 3427 } 3428 } 3429 3430 /* General case: iscol -> iscol_local which has global size of iscol */ 3431 if (call == MAT_REUSE_MATRIX) { 3432 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3433 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3434 } else { 3435 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3436 } 3437 3438 PetscCall(ISGetLocalSize(iscol, &csize)); 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3440 3441 if (call == MAT_INITIAL_MATRIX) { 3442 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3443 PetscCall(ISDestroy(&iscol_local)); 3444 } 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /*@C 3449 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3450 and "off-diagonal" part of the matrix in CSR format. 3451 3452 Collective 3453 3454 Input Parameters: 3455 + comm - MPI communicator 3456 . A - "diagonal" portion of matrix 3457 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3458 - garray - global index of B columns 3459 3460 Output Parameter: 3461 . mat - the matrix, with input A as its local diagonal matrix 3462 3463 Level: advanced 3464 3465 Notes: 3466 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3467 3468 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3469 3470 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3471 @*/ 3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3473 { 3474 Mat_MPIAIJ *maij; 3475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3476 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3477 const PetscScalar *oa; 3478 Mat Bnew; 3479 PetscInt m, n, N; 3480 MatType mpi_mat_type; 3481 3482 PetscFunctionBegin; 3483 PetscCall(MatCreate(comm, mat)); 3484 PetscCall(MatGetSize(A, &m, &n)); 3485 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3486 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3487 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3488 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3489 3490 /* Get global columns of mat */ 3491 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3492 3493 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3494 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3495 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3496 PetscCall(MatSetType(*mat, mpi_mat_type)); 3497 3498 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3499 maij = (Mat_MPIAIJ *)(*mat)->data; 3500 3501 (*mat)->preallocated = PETSC_TRUE; 3502 3503 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3504 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3505 3506 /* Set A as diagonal portion of *mat */ 3507 maij->A = A; 3508 3509 nz = oi[m]; 3510 for (i = 0; i < nz; i++) { 3511 col = oj[i]; 3512 oj[i] = garray[col]; 3513 } 3514 3515 /* Set Bnew as off-diagonal portion of *mat */ 3516 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3517 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3518 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3519 bnew = (Mat_SeqAIJ *)Bnew->data; 3520 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3521 maij->B = Bnew; 3522 3523 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3524 3525 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3526 b->free_a = PETSC_FALSE; 3527 b->free_ij = PETSC_FALSE; 3528 PetscCall(MatDestroy(&B)); 3529 3530 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3531 bnew->free_a = PETSC_TRUE; 3532 bnew->free_ij = PETSC_TRUE; 3533 3534 /* condense columns of maij->B */ 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3536 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3539 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3544 3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3546 { 3547 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3548 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3550 Mat M, Msub, B = a->B; 3551 MatScalar *aa; 3552 Mat_SeqAIJ *aij; 3553 PetscInt *garray = a->garray, *colsub, Ncols; 3554 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3555 IS iscol_sub, iscmap; 3556 const PetscInt *is_idx, *cmap; 3557 PetscBool allcolumns = PETSC_FALSE; 3558 MPI_Comm comm; 3559 3560 PetscFunctionBegin; 3561 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3562 if (call == MAT_REUSE_MATRIX) { 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3564 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3565 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3568 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3571 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3572 3573 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3574 3575 } else { /* call == MAT_INITIAL_MATRIX) */ 3576 PetscBool flg; 3577 3578 PetscCall(ISGetLocalSize(iscol, &n)); 3579 PetscCall(ISGetSize(iscol, &Ncols)); 3580 3581 /* (1) iscol -> nonscalable iscol_local */ 3582 /* Check for special case: each processor gets entire matrix columns */ 3583 PetscCall(ISIdentity(iscol_local, &flg)); 3584 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3585 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3586 if (allcolumns) { 3587 iscol_sub = iscol_local; 3588 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3589 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3590 3591 } else { 3592 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3593 PetscInt *idx, *cmap1, k; 3594 PetscCall(PetscMalloc1(Ncols, &idx)); 3595 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3596 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3597 count = 0; 3598 k = 0; 3599 for (i = 0; i < Ncols; i++) { 3600 j = is_idx[i]; 3601 if (j >= cstart && j < cend) { 3602 /* diagonal part of mat */ 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (Bn) { 3606 /* off-diagonal part of mat */ 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (j > garray[k]) { 3611 while (j > garray[k] && k < Bn - 1) k++; 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } 3616 } 3617 } 3618 } 3619 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3620 3621 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3622 PetscCall(ISGetBlockSize(iscol, &cbs)); 3623 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3624 3625 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3626 } 3627 3628 /* (3) Create sequential Msub */ 3629 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3630 } 3631 3632 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3633 aij = (Mat_SeqAIJ *)(Msub)->data; 3634 ii = aij->i; 3635 PetscCall(ISGetIndices(iscmap, &cmap)); 3636 3637 /* 3638 m - number of local rows 3639 Ncols - number of columns (same on all processors) 3640 rstart - first row in new global matrix generated 3641 */ 3642 PetscCall(MatGetSize(Msub, &m, NULL)); 3643 3644 if (call == MAT_INITIAL_MATRIX) { 3645 /* (4) Create parallel newmat */ 3646 PetscMPIInt rank, size; 3647 PetscInt csize; 3648 3649 PetscCallMPI(MPI_Comm_size(comm, &size)); 3650 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3651 3652 /* 3653 Determine the number of non-zeros in the diagonal and off-diagonal 3654 portions of the matrix in order to do correct preallocation 3655 */ 3656 3657 /* first get start and end of "diagonal" columns */ 3658 PetscCall(ISGetLocalSize(iscol, &csize)); 3659 if (csize == PETSC_DECIDE) { 3660 PetscCall(ISGetSize(isrow, &mglobal)); 3661 if (mglobal == Ncols) { /* square matrix */ 3662 nlocal = m; 3663 } else { 3664 nlocal = Ncols / size + ((Ncols % size) > rank); 3665 } 3666 } else { 3667 nlocal = csize; 3668 } 3669 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3670 rstart = rend - nlocal; 3671 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3672 3673 /* next, compute all the lengths */ 3674 jj = aij->j; 3675 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3676 olens = dlens + m; 3677 for (i = 0; i < m; i++) { 3678 jend = ii[i + 1] - ii[i]; 3679 olen = 0; 3680 dlen = 0; 3681 for (j = 0; j < jend; j++) { 3682 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3683 else dlen++; 3684 jj++; 3685 } 3686 olens[i] = olen; 3687 dlens[i] = dlen; 3688 } 3689 3690 PetscCall(ISGetBlockSize(isrow, &bs)); 3691 PetscCall(ISGetBlockSize(iscol, &cbs)); 3692 3693 PetscCall(MatCreate(comm, &M)); 3694 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3695 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3696 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3697 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3698 PetscCall(PetscFree(dlens)); 3699 3700 } else { /* call == MAT_REUSE_MATRIX */ 3701 M = *newmat; 3702 PetscCall(MatGetLocalSize(M, &i, NULL)); 3703 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3704 PetscCall(MatZeroEntries(M)); 3705 /* 3706 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3707 rather than the slower MatSetValues(). 3708 */ 3709 M->was_assembled = PETSC_TRUE; 3710 M->assembled = PETSC_FALSE; 3711 } 3712 3713 /* (5) Set values of Msub to *newmat */ 3714 PetscCall(PetscMalloc1(count, &colsub)); 3715 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3716 3717 jj = aij->j; 3718 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3719 for (i = 0; i < m; i++) { 3720 row = rstart + i; 3721 nz = ii[i + 1] - ii[i]; 3722 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3723 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3724 jj += nz; 3725 aa += nz; 3726 } 3727 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3728 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3729 3730 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3731 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3732 3733 PetscCall(PetscFree(colsub)); 3734 3735 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3736 if (call == MAT_INITIAL_MATRIX) { 3737 *newmat = M; 3738 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3739 PetscCall(MatDestroy(&Msub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3742 PetscCall(ISDestroy(&iscol_sub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3745 PetscCall(ISDestroy(&iscmap)); 3746 3747 if (iscol_local) { 3748 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3749 PetscCall(ISDestroy(&iscol_local)); 3750 } 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /* 3756 Not great since it makes two copies of the submatrix, first an SeqAIJ 3757 in local and then by concatenating the local matrices the end result. 3758 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3759 3760 This requires a sequential iscol with all indices. 3761 */ 3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3763 { 3764 PetscMPIInt rank, size; 3765 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3766 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3767 Mat M, Mreuse; 3768 MatScalar *aa, *vwork; 3769 MPI_Comm comm; 3770 Mat_SeqAIJ *aij; 3771 PetscBool colflag, allcolumns = PETSC_FALSE; 3772 3773 PetscFunctionBegin; 3774 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3775 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3776 PetscCallMPI(MPI_Comm_size(comm, &size)); 3777 3778 /* Check for special case: each processor gets entire matrix columns */ 3779 PetscCall(ISIdentity(iscol, &colflag)); 3780 PetscCall(ISGetLocalSize(iscol, &n)); 3781 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3782 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3783 3784 if (call == MAT_REUSE_MATRIX) { 3785 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3786 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3788 } else { 3789 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3790 } 3791 3792 /* 3793 m - number of local rows 3794 n - number of columns (same on all processors) 3795 rstart - first row in new global matrix generated 3796 */ 3797 PetscCall(MatGetSize(Mreuse, &m, &n)); 3798 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3799 if (call == MAT_INITIAL_MATRIX) { 3800 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3801 ii = aij->i; 3802 jj = aij->j; 3803 3804 /* 3805 Determine the number of non-zeros in the diagonal and off-diagonal 3806 portions of the matrix in order to do correct preallocation 3807 */ 3808 3809 /* first get start and end of "diagonal" columns */ 3810 if (csize == PETSC_DECIDE) { 3811 PetscCall(ISGetSize(isrow, &mglobal)); 3812 if (mglobal == n) { /* square matrix */ 3813 nlocal = m; 3814 } else { 3815 nlocal = n / size + ((n % size) > rank); 3816 } 3817 } else { 3818 nlocal = csize; 3819 } 3820 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3821 rstart = rend - nlocal; 3822 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3823 3824 /* next, compute all the lengths */ 3825 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3826 olens = dlens + m; 3827 for (i = 0; i < m; i++) { 3828 jend = ii[i + 1] - ii[i]; 3829 olen = 0; 3830 dlen = 0; 3831 for (j = 0; j < jend; j++) { 3832 if (*jj < rstart || *jj >= rend) olen++; 3833 else dlen++; 3834 jj++; 3835 } 3836 olens[i] = olen; 3837 dlens[i] = dlen; 3838 } 3839 PetscCall(MatCreate(comm, &M)); 3840 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3841 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3842 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3843 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3844 PetscCall(PetscFree(dlens)); 3845 } else { 3846 PetscInt ml, nl; 3847 3848 M = *newmat; 3849 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3850 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3851 PetscCall(MatZeroEntries(M)); 3852 /* 3853 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3854 rather than the slower MatSetValues(). 3855 */ 3856 M->was_assembled = PETSC_TRUE; 3857 M->assembled = PETSC_FALSE; 3858 } 3859 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3860 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3861 ii = aij->i; 3862 jj = aij->j; 3863 3864 /* trigger copy to CPU if needed */ 3865 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3866 for (i = 0; i < m; i++) { 3867 row = rstart + i; 3868 nz = ii[i + 1] - ii[i]; 3869 cwork = jj; 3870 jj += nz; 3871 vwork = aa; 3872 aa += nz; 3873 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3874 } 3875 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3876 3877 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3878 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3879 *newmat = M; 3880 3881 /* save submatrix used in processor for next request */ 3882 if (call == MAT_INITIAL_MATRIX) { 3883 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3884 PetscCall(MatDestroy(&Mreuse)); 3885 } 3886 PetscFunctionReturn(PETSC_SUCCESS); 3887 } 3888 3889 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3890 { 3891 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3892 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3893 const PetscInt *JJ; 3894 PetscBool nooffprocentries; 3895 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3896 3897 PetscFunctionBegin; 3898 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3899 3900 PetscCall(PetscLayoutSetUp(B->rmap)); 3901 PetscCall(PetscLayoutSetUp(B->cmap)); 3902 m = B->rmap->n; 3903 cstart = B->cmap->rstart; 3904 cend = B->cmap->rend; 3905 rstart = B->rmap->rstart; 3906 3907 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3908 3909 if (PetscDefined(USE_DEBUG)) { 3910 for (i = 0; i < m; i++) { 3911 nnz = Ii[i + 1] - Ii[i]; 3912 JJ = J + Ii[i]; 3913 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3914 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3915 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3916 } 3917 } 3918 3919 for (i = 0; i < m; i++) { 3920 nnz = Ii[i + 1] - Ii[i]; 3921 JJ = J + Ii[i]; 3922 nnz_max = PetscMax(nnz_max, nnz); 3923 d = 0; 3924 for (j = 0; j < nnz; j++) { 3925 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3926 } 3927 d_nnz[i] = d; 3928 o_nnz[i] = nnz - d; 3929 } 3930 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3931 PetscCall(PetscFree2(d_nnz, o_nnz)); 3932 3933 for (i = 0; i < m; i++) { 3934 ii = i + rstart; 3935 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3936 } 3937 nooffprocentries = B->nooffprocentries; 3938 B->nooffprocentries = PETSC_TRUE; 3939 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3940 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3941 B->nooffprocentries = nooffprocentries; 3942 3943 /* count number of entries below block diagonal */ 3944 PetscCall(PetscFree(Aij->ld)); 3945 PetscCall(PetscCalloc1(m, &ld)); 3946 Aij->ld = ld; 3947 for (i = 0; i < m; i++) { 3948 nnz = Ii[i + 1] - Ii[i]; 3949 j = 0; 3950 while (j < nnz && J[j] < cstart) j++; 3951 ld[i] = j; 3952 J += nnz; 3953 } 3954 3955 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3956 PetscFunctionReturn(PETSC_SUCCESS); 3957 } 3958 3959 /*@ 3960 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3961 (the default parallel PETSc format). 3962 3963 Collective 3964 3965 Input Parameters: 3966 + B - the matrix 3967 . i - the indices into j for the start of each local row (starts with zero) 3968 . j - the column indices for each local row (starts with zero) 3969 - v - optional values in the matrix 3970 3971 Level: developer 3972 3973 Notes: 3974 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3975 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3976 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3977 3978 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3979 3980 The format which is used for the sparse matrix input, is equivalent to a 3981 row-major ordering.. i.e for the following matrix, the input data expected is 3982 as shown 3983 3984 .vb 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0] rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = 3] 3993 v = {1,2,3} [size = 3] 3994 3995 Process1 [P1] rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = 3] 3998 v = {4,5,6} [size = 3] 3999 .ve 4000 4001 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4002 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4003 @*/ 4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4005 { 4006 PetscFunctionBegin; 4007 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4008 PetscFunctionReturn(PETSC_SUCCESS); 4009 } 4010 4011 /*@C 4012 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4013 (the default parallel PETSc format). For good matrix assembly performance 4014 the user should preallocate the matrix storage by setting the parameters 4015 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4016 performance can be increased by more than a factor of 50. 4017 4018 Collective 4019 4020 Input Parameters: 4021 + B - the matrix 4022 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4023 (same value is used for all local rows) 4024 . d_nnz - array containing the number of nonzeros in the various rows of the 4025 DIAGONAL portion of the local submatrix (possibly different for each row) 4026 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4027 The size of this array is equal to the number of local rows, i.e 'm'. 4028 For matrices that will be factored, you must leave room for (and set) 4029 the diagonal entry even if it is zero. 4030 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4031 submatrix (same value is used for all local rows). 4032 - o_nnz - array containing the number of nonzeros in the various rows of the 4033 OFF-DIAGONAL portion of the local submatrix (possibly different for 4034 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4035 structure. The size of this array is equal to the number 4036 of local rows, i.e 'm'. 4037 4038 Usage: 4039 Consider the following 8x8 matrix with 34 non-zero values, that is 4040 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4041 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4042 as follows 4043 4044 .vb 4045 1 2 0 | 0 3 0 | 0 4 4046 Proc0 0 5 6 | 7 0 0 | 8 0 4047 9 0 10 | 11 0 0 | 12 0 4048 ------------------------------------- 4049 13 0 14 | 15 16 17 | 0 0 4050 Proc1 0 18 0 | 19 20 21 | 0 0 4051 0 0 0 | 22 23 0 | 24 0 4052 ------------------------------------- 4053 Proc2 25 26 27 | 0 0 28 | 29 0 4054 30 0 0 | 31 32 33 | 0 34 4055 .ve 4056 4057 This can be represented as a collection of submatrices as 4058 .vb 4059 A B C 4060 D E F 4061 G H I 4062 .ve 4063 4064 Where the submatrices A,B,C are owned by proc0, D,E,F are 4065 owned by proc1, G,H,I are owned by proc2. 4066 4067 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4069 The 'M','N' parameters are 8,8, and have the same values on all procs. 4070 4071 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4072 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4073 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4074 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4075 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4076 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4077 4078 When d_nz, o_nz parameters are specified, d_nz storage elements are 4079 allocated for every row of the local diagonal submatrix, and o_nz 4080 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4081 One way to choose d_nz and o_nz is to use the max nonzerors per local 4082 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4083 In this case, the values of d_nz,o_nz are 4084 .vb 4085 proc0 dnz = 2, o_nz = 2 4086 proc1 dnz = 3, o_nz = 2 4087 proc2 dnz = 1, o_nz = 4 4088 .ve 4089 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4090 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4091 for proc3. i.e we are using 12+15+10=37 storage locations to store 4092 34 values. 4093 4094 When d_nnz, o_nnz parameters are specified, the storage is specified 4095 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4096 In the above case the values for d_nnz,o_nnz are 4097 .vb 4098 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4099 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4100 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4101 .ve 4102 Here the space allocated is sum of all the above values i.e 34, and 4103 hence pre-allocation is perfect. 4104 4105 Level: intermediate 4106 4107 Notes: 4108 If the *_nnz parameter is given then the *_nz parameter is ignored 4109 4110 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4111 storage. The stored row and column indices begin with zero. 4112 See [Sparse Matrices](sec_matsparse) for details. 4113 4114 The parallel matrix is partitioned such that the first m0 rows belong to 4115 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4116 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4117 4118 The DIAGONAL portion of the local submatrix of a processor can be defined 4119 as the submatrix which is obtained by extraction the part corresponding to 4120 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4121 first row that belongs to the processor, r2 is the last row belonging to 4122 the this processor, and c1-c2 is range of indices of the local part of a 4123 vector suitable for applying the matrix to. This is an mxn matrix. In the 4124 common case of a square matrix, the row and column ranges are the same and 4125 the DIAGONAL part is also square. The remaining portion of the local 4126 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4127 4128 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4129 4130 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4131 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4132 You can also run with the option `-info` and look for messages with the string 4133 malloc in them to see if additional memory allocation was needed. 4134 4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4158 calculated if N is given) For square matrices n is almost always m. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4161 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4173 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4174 4175 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4176 4177 The format which is used for the sparse matrix input, is equivalent to a 4178 row-major ordering.. i.e for the following matrix, the input data expected is 4179 as shown 4180 4181 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4182 4183 $ 1 0 0 4184 $ 2 0 3 P0 4185 $ ------- 4186 $ 4 5 6 P1 4187 $ 4188 $ Process0 [P0] rows_owned=[0,1] 4189 $ i = {0,1,3} [size = nrow+1 = 2+1] 4190 $ j = {0,0,2} [size = 3] 4191 $ v = {1,2,3} [size = 3] 4192 $ 4193 $ Process1 [P1] rows_owned=[2] 4194 $ i = {0,3} [size = nrow+1 = 1+1] 4195 $ j = {0,1,2} [size = 3] 4196 $ v = {4,5,6} [size = 3] 4197 4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(PETSC_SUCCESS); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4217 4218 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4219 4220 Collective 4221 4222 Input Parameters: 4223 + mat - the matrix 4224 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4225 . n - This value should be the same as the local size used in creating the 4226 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4227 calculated if N is given) For square matrices n is almost always m. 4228 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4229 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4230 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4231 . J - column indices 4232 - v - matrix values 4233 4234 Level: intermediate 4235 4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4237 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4238 @*/ 4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4240 { 4241 PetscInt nnz, i; 4242 PetscBool nooffprocentries; 4243 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4244 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4245 PetscScalar *ad, *ao; 4246 PetscInt ldi, Iii, md; 4247 const PetscInt *Adi = Ad->i; 4248 PetscInt *ld = Aij->ld; 4249 4250 PetscFunctionBegin; 4251 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4252 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4254 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 4256 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4258 4259 for (i = 0; i < m; i++) { 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(PETSC_SUCCESS); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Note: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4298 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4299 @*/ 4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4301 { 4302 PetscInt nnz, i, m; 4303 PetscBool nooffprocentries; 4304 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4307 PetscScalar *ad, *ao; 4308 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4309 PetscInt ldi, Iii, md; 4310 PetscInt *ld = Aij->ld; 4311 4312 PetscFunctionBegin; 4313 m = mat->rmap->n; 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 Iii = 0; 4318 for (i = 0; i < m; i++) { 4319 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4320 ldi = ld[i]; 4321 md = Adi[i + 1] - Adi[i]; 4322 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4323 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ad += md; 4326 ao += nnz - md; 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(PETSC_SUCCESS); 4340 } 4341 4342 /*@C 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4347 performance can be increased by more than a factor of 50. 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or NULL, if d_nz is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or NULL, if o_nz is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 Options Database Keys: 4379 + -mat_no_inode - Do not use inodes 4380 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4381 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4382 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4383 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4384 4385 Notes: 4386 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4387 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4388 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4389 4390 If the *_nnz parameter is given then the *_nz parameter is ignored 4391 4392 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4393 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4394 storage requirements for this matrix. 4395 4396 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4397 processor than it must be used on all processors that share the object for 4398 that argument. 4399 4400 The user MUST specify either the local or global matrix dimensions 4401 (possibly both). 4402 4403 The parallel matrix is partitioned across processors such that the 4404 first m0 rows belong to process 0, the next m1 rows belong to 4405 process 1, the next m2 rows belong to process 2 etc.. where 4406 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4407 values corresponding to [m x N] submatrix. 4408 4409 The columns are logically partitioned with the n0 columns belonging 4410 to 0th partition, the next n1 columns belonging to the next 4411 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4412 4413 The DIAGONAL portion of the local submatrix on any given processor 4414 is the submatrix corresponding to the rows and columns m,n 4415 corresponding to the given processor. i.e diagonal matrix on 4416 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4417 etc. The remaining portion of the local submatrix [m x (N-n)] 4418 constitute the OFF-DIAGONAL portion. The example below better 4419 illustrates this concept. 4420 4421 For a square global matrix we define each processor's diagonal portion 4422 to be its local rows and the corresponding columns (a square submatrix); 4423 each processor's off-diagonal portion encompasses the remainder of the 4424 local matrix (a rectangular submatrix). 4425 4426 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4427 4428 When calling this routine with a single process communicator, a matrix of 4429 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4430 type of communicator, use the construction mechanism 4431 .vb 4432 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4433 .ve 4434 4435 $ MatCreate(...,&A); 4436 $ MatSetType(A,MATMPIAIJ); 4437 $ MatSetSizes(A, m,n,M,N); 4438 $ MatMPIAIJSetPreallocation(A,...); 4439 4440 By default, this format uses inodes (identical nodes) when possible. 4441 We search for consecutive rows with the same nonzero structure, thereby 4442 reusing matrix information to achieve increased efficiency. 4443 4444 Usage: 4445 Consider the following 8x8 matrix with 34 non-zero values, that is 4446 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4447 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4448 as follows 4449 4450 .vb 4451 1 2 0 | 0 3 0 | 0 4 4452 Proc0 0 5 6 | 7 0 0 | 8 0 4453 9 0 10 | 11 0 0 | 12 0 4454 ------------------------------------- 4455 13 0 14 | 15 16 17 | 0 0 4456 Proc1 0 18 0 | 19 20 21 | 0 0 4457 0 0 0 | 22 23 0 | 24 0 4458 ------------------------------------- 4459 Proc2 25 26 27 | 0 0 28 | 29 0 4460 30 0 0 | 31 32 33 | 0 34 4461 .ve 4462 4463 This can be represented as a collection of submatrices as 4464 4465 .vb 4466 A B C 4467 D E F 4468 G H I 4469 .ve 4470 4471 Where the submatrices A,B,C are owned by proc0, D,E,F are 4472 owned by proc1, G,H,I are owned by proc2. 4473 4474 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4475 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'M','N' parameters are 8,8, and have the same values on all procs. 4477 4478 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4479 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4480 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4481 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4482 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4483 matrix, ans [DF] as another SeqAIJ matrix. 4484 4485 When d_nz, o_nz parameters are specified, d_nz storage elements are 4486 allocated for every row of the local diagonal submatrix, and o_nz 4487 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4488 One way to choose d_nz and o_nz is to use the max nonzerors per local 4489 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4490 In this case, the values of d_nz,o_nz are 4491 .vb 4492 proc0 dnz = 2, o_nz = 2 4493 proc1 dnz = 3, o_nz = 2 4494 proc2 dnz = 1, o_nz = 4 4495 .ve 4496 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4497 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4498 for proc3. i.e we are using 12+15+10=37 storage locations to store 4499 34 values. 4500 4501 When d_nnz, o_nnz parameters are specified, the storage is specified 4502 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4503 In the above case the values for d_nnz,o_nnz are 4504 .vb 4505 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4506 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4507 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4508 .ve 4509 Here the space allocated is sum of all the above values i.e 34, and 4510 hence pre-allocation is perfect. 4511 4512 Level: intermediate 4513 4514 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4515 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4516 @*/ 4517 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4518 { 4519 PetscMPIInt size; 4520 4521 PetscFunctionBegin; 4522 PetscCall(MatCreate(comm, A)); 4523 PetscCall(MatSetSizes(*A, m, n, M, N)); 4524 PetscCallMPI(MPI_Comm_size(comm, &size)); 4525 if (size > 1) { 4526 PetscCall(MatSetType(*A, MATMPIAIJ)); 4527 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4528 } else { 4529 PetscCall(MatSetType(*A, MATSEQAIJ)); 4530 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4531 } 4532 PetscFunctionReturn(PETSC_SUCCESS); 4533 } 4534 4535 /*MC 4536 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4537 4538 Synopsis: 4539 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4540 4541 Not Collective 4542 4543 Input Parameter: 4544 . A - the `MATMPIAIJ` matrix 4545 4546 Output Parameters: 4547 + Ad - the diagonal portion of the matrix 4548 . Ao - the off diagonal portion of the matrix 4549 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4550 - ierr - error code 4551 4552 Level: advanced 4553 4554 Note: 4555 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4556 4557 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4558 M*/ 4559 4560 /*MC 4561 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4562 4563 Synopsis: 4564 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4565 4566 Not Collective 4567 4568 Input Parameters: 4569 + A - the `MATMPIAIJ` matrix 4570 . Ad - the diagonal portion of the matrix 4571 . Ao - the off diagonal portion of the matrix 4572 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4573 - ierr - error code 4574 4575 Level: advanced 4576 4577 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4578 M*/ 4579 4580 /*@C 4581 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4582 4583 Not collective 4584 4585 Input Parameter: 4586 . A - The `MATMPIAIJ` matrix 4587 4588 Output Parameters: 4589 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4590 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4591 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4592 4593 Level: intermediate 4594 4595 Note: 4596 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4597 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4598 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4599 local column numbers to global column numbers in the original matrix. 4600 4601 Fortran Note: 4602 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4603 4604 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4605 @*/ 4606 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4607 { 4608 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4609 PetscBool flg; 4610 4611 PetscFunctionBegin; 4612 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4613 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4614 if (Ad) *Ad = a->A; 4615 if (Ao) *Ao = a->B; 4616 if (colmap) *colmap = a->garray; 4617 PetscFunctionReturn(PETSC_SUCCESS); 4618 } 4619 4620 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4621 { 4622 PetscInt m, N, i, rstart, nnz, Ii; 4623 PetscInt *indx; 4624 PetscScalar *values; 4625 MatType rootType; 4626 4627 PetscFunctionBegin; 4628 PetscCall(MatGetSize(inmat, &m, &N)); 4629 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4630 PetscInt *dnz, *onz, sum, bs, cbs; 4631 4632 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4633 /* Check sum(n) = N */ 4634 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4635 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4636 4637 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4638 rstart -= m; 4639 4640 MatPreallocateBegin(comm, m, n, dnz, onz); 4641 for (i = 0; i < m; i++) { 4642 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4643 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4644 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4645 } 4646 4647 PetscCall(MatCreate(comm, outmat)); 4648 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4649 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4650 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4651 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4652 PetscCall(MatSetType(*outmat, rootType)); 4653 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4654 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4655 MatPreallocateEnd(dnz, onz); 4656 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4657 } 4658 4659 /* numeric phase */ 4660 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4661 for (i = 0; i < m; i++) { 4662 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4663 Ii = i + rstart; 4664 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4665 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4666 } 4667 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4668 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscFunctionReturn(PETSC_SUCCESS); 4670 } 4671 4672 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4673 { 4674 PetscMPIInt rank; 4675 PetscInt m, N, i, rstart, nnz; 4676 size_t len; 4677 const PetscInt *indx; 4678 PetscViewer out; 4679 char *name; 4680 Mat B; 4681 const PetscScalar *values; 4682 4683 PetscFunctionBegin; 4684 PetscCall(MatGetLocalSize(A, &m, NULL)); 4685 PetscCall(MatGetSize(A, NULL, &N)); 4686 /* Should this be the type of the diagonal block of A? */ 4687 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4688 PetscCall(MatSetSizes(B, m, N, m, N)); 4689 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4690 PetscCall(MatSetType(B, MATSEQAIJ)); 4691 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4692 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4693 for (i = 0; i < m; i++) { 4694 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4695 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4696 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4697 } 4698 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4699 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4700 4701 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4702 PetscCall(PetscStrlen(outfile, &len)); 4703 PetscCall(PetscMalloc1(len + 6, &name)); 4704 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4705 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4706 PetscCall(PetscFree(name)); 4707 PetscCall(MatView(B, out)); 4708 PetscCall(PetscViewerDestroy(&out)); 4709 PetscCall(MatDestroy(&B)); 4710 PetscFunctionReturn(PETSC_SUCCESS); 4711 } 4712 4713 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4714 { 4715 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4716 4717 PetscFunctionBegin; 4718 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4719 PetscCall(PetscFree(merge->id_r)); 4720 PetscCall(PetscFree(merge->len_s)); 4721 PetscCall(PetscFree(merge->len_r)); 4722 PetscCall(PetscFree(merge->bi)); 4723 PetscCall(PetscFree(merge->bj)); 4724 PetscCall(PetscFree(merge->buf_ri[0])); 4725 PetscCall(PetscFree(merge->buf_ri)); 4726 PetscCall(PetscFree(merge->buf_rj[0])); 4727 PetscCall(PetscFree(merge->buf_rj)); 4728 PetscCall(PetscFree(merge->coi)); 4729 PetscCall(PetscFree(merge->coj)); 4730 PetscCall(PetscFree(merge->owners_co)); 4731 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4732 PetscCall(PetscFree(merge)); 4733 PetscFunctionReturn(PETSC_SUCCESS); 4734 } 4735 4736 #include <../src/mat/utils/freespace.h> 4737 #include <petscbt.h> 4738 4739 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4740 { 4741 MPI_Comm comm; 4742 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4743 PetscMPIInt size, rank, taga, *len_s; 4744 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4745 PetscInt proc, m; 4746 PetscInt **buf_ri, **buf_rj; 4747 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4748 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4749 MPI_Request *s_waits, *r_waits; 4750 MPI_Status *status; 4751 const MatScalar *aa, *a_a; 4752 MatScalar **abuf_r, *ba_i; 4753 Mat_Merge_SeqsToMPI *merge; 4754 PetscContainer container; 4755 4756 PetscFunctionBegin; 4757 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4758 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4759 4760 PetscCallMPI(MPI_Comm_size(comm, &size)); 4761 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4762 4763 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4764 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4765 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4766 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4767 aa = a_a; 4768 4769 bi = merge->bi; 4770 bj = merge->bj; 4771 buf_ri = merge->buf_ri; 4772 buf_rj = merge->buf_rj; 4773 4774 PetscCall(PetscMalloc1(size, &status)); 4775 owners = merge->rowmap->range; 4776 len_s = merge->len_s; 4777 4778 /* send and recv matrix values */ 4779 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4780 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4781 4782 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4783 for (proc = 0, k = 0; proc < size; proc++) { 4784 if (!len_s[proc]) continue; 4785 i = owners[proc]; 4786 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4787 k++; 4788 } 4789 4790 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4791 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4792 PetscCall(PetscFree(status)); 4793 4794 PetscCall(PetscFree(s_waits)); 4795 PetscCall(PetscFree(r_waits)); 4796 4797 /* insert mat values of mpimat */ 4798 PetscCall(PetscMalloc1(N, &ba_i)); 4799 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4800 4801 for (k = 0; k < merge->nrecv; k++) { 4802 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4803 nrows = *(buf_ri_k[k]); 4804 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4805 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4806 } 4807 4808 /* set values of ba */ 4809 m = merge->rowmap->n; 4810 for (i = 0; i < m; i++) { 4811 arow = owners[rank] + i; 4812 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4813 bnzi = bi[i + 1] - bi[i]; 4814 PetscCall(PetscArrayzero(ba_i, bnzi)); 4815 4816 /* add local non-zero vals of this proc's seqmat into ba */ 4817 anzi = ai[arow + 1] - ai[arow]; 4818 aj = a->j + ai[arow]; 4819 aa = a_a + ai[arow]; 4820 nextaj = 0; 4821 for (j = 0; nextaj < anzi; j++) { 4822 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4823 ba_i[j] += aa[nextaj++]; 4824 } 4825 } 4826 4827 /* add received vals into ba */ 4828 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4829 /* i-th row */ 4830 if (i == *nextrow[k]) { 4831 anzi = *(nextai[k] + 1) - *nextai[k]; 4832 aj = buf_rj[k] + *(nextai[k]); 4833 aa = abuf_r[k] + *(nextai[k]); 4834 nextaj = 0; 4835 for (j = 0; nextaj < anzi; j++) { 4836 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4837 ba_i[j] += aa[nextaj++]; 4838 } 4839 } 4840 nextrow[k]++; 4841 nextai[k]++; 4842 } 4843 } 4844 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4845 } 4846 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4847 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4848 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4849 4850 PetscCall(PetscFree(abuf_r[0])); 4851 PetscCall(PetscFree(abuf_r)); 4852 PetscCall(PetscFree(ba_i)); 4853 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4854 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4855 PetscFunctionReturn(PETSC_SUCCESS); 4856 } 4857 4858 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4859 { 4860 Mat B_mpi; 4861 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4862 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4863 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4864 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4865 PetscInt len, proc, *dnz, *onz, bs, cbs; 4866 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4867 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4868 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4869 MPI_Status *status; 4870 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4871 PetscBT lnkbt; 4872 Mat_Merge_SeqsToMPI *merge; 4873 PetscContainer container; 4874 4875 PetscFunctionBegin; 4876 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4877 4878 /* make sure it is a PETSc comm */ 4879 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4880 PetscCallMPI(MPI_Comm_size(comm, &size)); 4881 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4882 4883 PetscCall(PetscNew(&merge)); 4884 PetscCall(PetscMalloc1(size, &status)); 4885 4886 /* determine row ownership */ 4887 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4888 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4889 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4890 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4891 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4892 PetscCall(PetscMalloc1(size, &len_si)); 4893 PetscCall(PetscMalloc1(size, &merge->len_s)); 4894 4895 m = merge->rowmap->n; 4896 owners = merge->rowmap->range; 4897 4898 /* determine the number of messages to send, their lengths */ 4899 len_s = merge->len_s; 4900 4901 len = 0; /* length of buf_si[] */ 4902 merge->nsend = 0; 4903 for (proc = 0; proc < size; proc++) { 4904 len_si[proc] = 0; 4905 if (proc == rank) { 4906 len_s[proc] = 0; 4907 } else { 4908 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4909 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4910 } 4911 if (len_s[proc]) { 4912 merge->nsend++; 4913 nrows = 0; 4914 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4915 if (ai[i + 1] > ai[i]) nrows++; 4916 } 4917 len_si[proc] = 2 * (nrows + 1); 4918 len += len_si[proc]; 4919 } 4920 } 4921 4922 /* determine the number and length of messages to receive for ij-structure */ 4923 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4924 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4925 4926 /* post the Irecv of j-structure */ 4927 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4928 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4929 4930 /* post the Isend of j-structure */ 4931 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4932 4933 for (proc = 0, k = 0; proc < size; proc++) { 4934 if (!len_s[proc]) continue; 4935 i = owners[proc]; 4936 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4937 k++; 4938 } 4939 4940 /* receives and sends of j-structure are complete */ 4941 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4942 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4943 4944 /* send and recv i-structure */ 4945 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4946 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4947 4948 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4949 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4950 for (proc = 0, k = 0; proc < size; proc++) { 4951 if (!len_s[proc]) continue; 4952 /* form outgoing message for i-structure: 4953 buf_si[0]: nrows to be sent 4954 [1:nrows]: row index (global) 4955 [nrows+1:2*nrows+1]: i-structure index 4956 */ 4957 nrows = len_si[proc] / 2 - 1; 4958 buf_si_i = buf_si + nrows + 1; 4959 buf_si[0] = nrows; 4960 buf_si_i[0] = 0; 4961 nrows = 0; 4962 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4963 anzi = ai[i + 1] - ai[i]; 4964 if (anzi) { 4965 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4966 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4967 nrows++; 4968 } 4969 } 4970 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4971 k++; 4972 buf_si += len_si[proc]; 4973 } 4974 4975 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4976 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4977 4978 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4979 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4980 4981 PetscCall(PetscFree(len_si)); 4982 PetscCall(PetscFree(len_ri)); 4983 PetscCall(PetscFree(rj_waits)); 4984 PetscCall(PetscFree2(si_waits, sj_waits)); 4985 PetscCall(PetscFree(ri_waits)); 4986 PetscCall(PetscFree(buf_s)); 4987 PetscCall(PetscFree(status)); 4988 4989 /* compute a local seq matrix in each processor */ 4990 /* allocate bi array and free space for accumulating nonzero column info */ 4991 PetscCall(PetscMalloc1(m + 1, &bi)); 4992 bi[0] = 0; 4993 4994 /* create and initialize a linked list */ 4995 nlnk = N + 1; 4996 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4997 4998 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4999 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5000 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5001 5002 current_space = free_space; 5003 5004 /* determine symbolic info for each local row */ 5005 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5006 5007 for (k = 0; k < merge->nrecv; k++) { 5008 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5009 nrows = *buf_ri_k[k]; 5010 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5011 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5012 } 5013 5014 MatPreallocateBegin(comm, m, n, dnz, onz); 5015 len = 0; 5016 for (i = 0; i < m; i++) { 5017 bnzi = 0; 5018 /* add local non-zero cols of this proc's seqmat into lnk */ 5019 arow = owners[rank] + i; 5020 anzi = ai[arow + 1] - ai[arow]; 5021 aj = a->j + ai[arow]; 5022 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5023 bnzi += nlnk; 5024 /* add received col data into lnk */ 5025 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5026 if (i == *nextrow[k]) { /* i-th row */ 5027 anzi = *(nextai[k] + 1) - *nextai[k]; 5028 aj = buf_rj[k] + *nextai[k]; 5029 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5030 bnzi += nlnk; 5031 nextrow[k]++; 5032 nextai[k]++; 5033 } 5034 } 5035 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5036 5037 /* if free space is not available, make more free space */ 5038 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5039 /* copy data into free space, then initialize lnk */ 5040 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5041 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5042 5043 current_space->array += bnzi; 5044 current_space->local_used += bnzi; 5045 current_space->local_remaining -= bnzi; 5046 5047 bi[i + 1] = bi[i] + bnzi; 5048 } 5049 5050 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5051 5052 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5053 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5054 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5055 5056 /* create symbolic parallel matrix B_mpi */ 5057 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5058 PetscCall(MatCreate(comm, &B_mpi)); 5059 if (n == PETSC_DECIDE) { 5060 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5061 } else { 5062 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5063 } 5064 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5065 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5066 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5067 MatPreallocateEnd(dnz, onz); 5068 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5069 5070 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5071 B_mpi->assembled = PETSC_FALSE; 5072 merge->bi = bi; 5073 merge->bj = bj; 5074 merge->buf_ri = buf_ri; 5075 merge->buf_rj = buf_rj; 5076 merge->coi = NULL; 5077 merge->coj = NULL; 5078 merge->owners_co = NULL; 5079 5080 PetscCall(PetscCommDestroy(&comm)); 5081 5082 /* attach the supporting struct to B_mpi for reuse */ 5083 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5084 PetscCall(PetscContainerSetPointer(container, merge)); 5085 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5086 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5087 PetscCall(PetscContainerDestroy(&container)); 5088 *mpimat = B_mpi; 5089 5090 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5091 PetscFunctionReturn(PETSC_SUCCESS); 5092 } 5093 5094 /*@C 5095 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5096 matrices from each processor 5097 5098 Collective 5099 5100 Input Parameters: 5101 + comm - the communicators the parallel matrix will live on 5102 . seqmat - the input sequential matrices 5103 . m - number of local rows (or `PETSC_DECIDE`) 5104 . n - number of local columns (or `PETSC_DECIDE`) 5105 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5106 5107 Output Parameter: 5108 . mpimat - the parallel matrix generated 5109 5110 Level: advanced 5111 5112 Note: 5113 The dimensions of the sequential matrix in each processor MUST be the same. 5114 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5115 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5116 @*/ 5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5118 { 5119 PetscMPIInt size; 5120 5121 PetscFunctionBegin; 5122 PetscCallMPI(MPI_Comm_size(comm, &size)); 5123 if (size == 1) { 5124 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5125 if (scall == MAT_INITIAL_MATRIX) { 5126 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5127 } else { 5128 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5129 } 5130 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 PetscFunctionReturn(PETSC_SUCCESS); 5132 } 5133 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5134 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5135 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5136 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 PetscFunctionReturn(PETSC_SUCCESS); 5138 } 5139 5140 /*@ 5141 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5142 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5143 with `MatGetSize()` 5144 5145 Not Collective 5146 5147 Input Parameters: 5148 + A - the matrix 5149 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5150 5151 Output Parameter: 5152 . A_loc - the local sequential matrix generated 5153 5154 Level: developer 5155 5156 Notes: 5157 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5158 5159 Destroy the matrix with `MatDestroy()` 5160 5161 .seealso: `MatMPIAIJGetLocalMat()` 5162 @*/ 5163 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5164 { 5165 PetscBool mpi; 5166 5167 PetscFunctionBegin; 5168 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5169 if (mpi) { 5170 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5171 } else { 5172 *A_loc = A; 5173 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5174 } 5175 PetscFunctionReturn(PETSC_SUCCESS); 5176 } 5177 5178 /*@ 5179 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5180 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5181 with `MatGetSize()` 5182 5183 Not Collective 5184 5185 Input Parameters: 5186 + A - the matrix 5187 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5188 5189 Output Parameter: 5190 . A_loc - the local sequential matrix generated 5191 5192 Level: developer 5193 5194 Notes: 5195 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5196 5197 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5198 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5199 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5200 modify the values of the returned A_loc. 5201 5202 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5203 @*/ 5204 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5205 { 5206 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5207 Mat_SeqAIJ *mat, *a, *b; 5208 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5209 const PetscScalar *aa, *ba, *aav, *bav; 5210 PetscScalar *ca, *cam; 5211 PetscMPIInt size; 5212 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5213 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5214 PetscBool match; 5215 5216 PetscFunctionBegin; 5217 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5218 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5219 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5220 if (size == 1) { 5221 if (scall == MAT_INITIAL_MATRIX) { 5222 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5223 *A_loc = mpimat->A; 5224 } else if (scall == MAT_REUSE_MATRIX) { 5225 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5226 } 5227 PetscFunctionReturn(PETSC_SUCCESS); 5228 } 5229 5230 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5231 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5232 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5233 ai = a->i; 5234 aj = a->j; 5235 bi = b->i; 5236 bj = b->j; 5237 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5238 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5239 aa = aav; 5240 ba = bav; 5241 if (scall == MAT_INITIAL_MATRIX) { 5242 PetscCall(PetscMalloc1(1 + am, &ci)); 5243 ci[0] = 0; 5244 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5245 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5246 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5247 k = 0; 5248 for (i = 0; i < am; i++) { 5249 ncols_o = bi[i + 1] - bi[i]; 5250 ncols_d = ai[i + 1] - ai[i]; 5251 /* off-diagonal portion of A */ 5252 for (jo = 0; jo < ncols_o; jo++) { 5253 col = cmap[*bj]; 5254 if (col >= cstart) break; 5255 cj[k] = col; 5256 bj++; 5257 ca[k++] = *ba++; 5258 } 5259 /* diagonal portion of A */ 5260 for (j = 0; j < ncols_d; j++) { 5261 cj[k] = cstart + *aj++; 5262 ca[k++] = *aa++; 5263 } 5264 /* off-diagonal portion of A */ 5265 for (j = jo; j < ncols_o; j++) { 5266 cj[k] = cmap[*bj++]; 5267 ca[k++] = *ba++; 5268 } 5269 } 5270 /* put together the new matrix */ 5271 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5272 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5273 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5274 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5275 mat->free_a = PETSC_TRUE; 5276 mat->free_ij = PETSC_TRUE; 5277 mat->nonew = 0; 5278 } else if (scall == MAT_REUSE_MATRIX) { 5279 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5280 ci = mat->i; 5281 cj = mat->j; 5282 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5283 for (i = 0; i < am; i++) { 5284 /* off-diagonal portion of A */ 5285 ncols_o = bi[i + 1] - bi[i]; 5286 for (jo = 0; jo < ncols_o; jo++) { 5287 col = cmap[*bj]; 5288 if (col >= cstart) break; 5289 *cam++ = *ba++; 5290 bj++; 5291 } 5292 /* diagonal portion of A */ 5293 ncols_d = ai[i + 1] - ai[i]; 5294 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5295 /* off-diagonal portion of A */ 5296 for (j = jo; j < ncols_o; j++) { 5297 *cam++ = *ba++; 5298 bj++; 5299 } 5300 } 5301 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5302 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5303 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5304 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5305 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5306 PetscFunctionReturn(PETSC_SUCCESS); 5307 } 5308 5309 /*@ 5310 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5311 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5312 5313 Not Collective 5314 5315 Input Parameters: 5316 + A - the matrix 5317 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5318 5319 Output Parameters: 5320 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5321 - A_loc - the local sequential matrix generated 5322 5323 Level: developer 5324 5325 Note: 5326 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5327 5328 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5329 @*/ 5330 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5331 { 5332 Mat Ao, Ad; 5333 const PetscInt *cmap; 5334 PetscMPIInt size; 5335 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5336 5337 PetscFunctionBegin; 5338 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5339 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5340 if (size == 1) { 5341 if (scall == MAT_INITIAL_MATRIX) { 5342 PetscCall(PetscObjectReference((PetscObject)Ad)); 5343 *A_loc = Ad; 5344 } else if (scall == MAT_REUSE_MATRIX) { 5345 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5346 } 5347 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5348 PetscFunctionReturn(PETSC_SUCCESS); 5349 } 5350 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5351 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5352 if (f) { 5353 PetscCall((*f)(A, scall, glob, A_loc)); 5354 } else { 5355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5357 Mat_SeqAIJ *c; 5358 PetscInt *ai = a->i, *aj = a->j; 5359 PetscInt *bi = b->i, *bj = b->j; 5360 PetscInt *ci, *cj; 5361 const PetscScalar *aa, *ba; 5362 PetscScalar *ca; 5363 PetscInt i, j, am, dn, on; 5364 5365 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5366 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5367 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5368 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5369 if (scall == MAT_INITIAL_MATRIX) { 5370 PetscInt k; 5371 PetscCall(PetscMalloc1(1 + am, &ci)); 5372 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5373 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5374 ci[0] = 0; 5375 for (i = 0, k = 0; i < am; i++) { 5376 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5377 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5378 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5379 /* diagonal portion of A */ 5380 for (j = 0; j < ncols_d; j++, k++) { 5381 cj[k] = *aj++; 5382 ca[k] = *aa++; 5383 } 5384 /* off-diagonal portion of A */ 5385 for (j = 0; j < ncols_o; j++, k++) { 5386 cj[k] = dn + *bj++; 5387 ca[k] = *ba++; 5388 } 5389 } 5390 /* put together the new matrix */ 5391 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5392 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5393 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5394 c = (Mat_SeqAIJ *)(*A_loc)->data; 5395 c->free_a = PETSC_TRUE; 5396 c->free_ij = PETSC_TRUE; 5397 c->nonew = 0; 5398 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5399 } else if (scall == MAT_REUSE_MATRIX) { 5400 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5401 for (i = 0; i < am; i++) { 5402 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5403 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5404 /* diagonal portion of A */ 5405 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5406 /* off-diagonal portion of A */ 5407 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5408 } 5409 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5410 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5411 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5412 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5413 if (glob) { 5414 PetscInt cst, *gidx; 5415 5416 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5417 PetscCall(PetscMalloc1(dn + on, &gidx)); 5418 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5419 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5420 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5421 } 5422 } 5423 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5424 PetscFunctionReturn(PETSC_SUCCESS); 5425 } 5426 5427 /*@C 5428 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5429 5430 Not Collective 5431 5432 Input Parameters: 5433 + A - the matrix 5434 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5435 - row, col - index sets of rows and columns to extract (or NULL) 5436 5437 Output Parameter: 5438 . A_loc - the local sequential matrix generated 5439 5440 Level: developer 5441 5442 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5443 @*/ 5444 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5445 { 5446 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5447 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5448 IS isrowa, iscola; 5449 Mat *aloc; 5450 PetscBool match; 5451 5452 PetscFunctionBegin; 5453 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5454 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5455 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5456 if (!row) { 5457 start = A->rmap->rstart; 5458 end = A->rmap->rend; 5459 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5460 } else { 5461 isrowa = *row; 5462 } 5463 if (!col) { 5464 start = A->cmap->rstart; 5465 cmap = a->garray; 5466 nzA = a->A->cmap->n; 5467 nzB = a->B->cmap->n; 5468 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5469 ncols = 0; 5470 for (i = 0; i < nzB; i++) { 5471 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5472 else break; 5473 } 5474 imark = i; 5475 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5476 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5477 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5478 } else { 5479 iscola = *col; 5480 } 5481 if (scall != MAT_INITIAL_MATRIX) { 5482 PetscCall(PetscMalloc1(1, &aloc)); 5483 aloc[0] = *A_loc; 5484 } 5485 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5486 if (!col) { /* attach global id of condensed columns */ 5487 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5488 } 5489 *A_loc = aloc[0]; 5490 PetscCall(PetscFree(aloc)); 5491 if (!row) PetscCall(ISDestroy(&isrowa)); 5492 if (!col) PetscCall(ISDestroy(&iscola)); 5493 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5494 PetscFunctionReturn(PETSC_SUCCESS); 5495 } 5496 5497 /* 5498 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5499 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5500 * on a global size. 5501 * */ 5502 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5503 { 5504 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5505 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5506 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5507 PetscMPIInt owner; 5508 PetscSFNode *iremote, *oiremote; 5509 const PetscInt *lrowindices; 5510 PetscSF sf, osf; 5511 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5512 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5513 MPI_Comm comm; 5514 ISLocalToGlobalMapping mapping; 5515 const PetscScalar *pd_a, *po_a; 5516 5517 PetscFunctionBegin; 5518 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5519 /* plocalsize is the number of roots 5520 * nrows is the number of leaves 5521 * */ 5522 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5523 PetscCall(ISGetLocalSize(rows, &nrows)); 5524 PetscCall(PetscCalloc1(nrows, &iremote)); 5525 PetscCall(ISGetIndices(rows, &lrowindices)); 5526 for (i = 0; i < nrows; i++) { 5527 /* Find a remote index and an owner for a row 5528 * The row could be local or remote 5529 * */ 5530 owner = 0; 5531 lidx = 0; 5532 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5533 iremote[i].index = lidx; 5534 iremote[i].rank = owner; 5535 } 5536 /* Create SF to communicate how many nonzero columns for each row */ 5537 PetscCall(PetscSFCreate(comm, &sf)); 5538 /* SF will figure out the number of nonzero colunms for each row, and their 5539 * offsets 5540 * */ 5541 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5542 PetscCall(PetscSFSetFromOptions(sf)); 5543 PetscCall(PetscSFSetUp(sf)); 5544 5545 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5546 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5547 PetscCall(PetscCalloc1(nrows, &pnnz)); 5548 roffsets[0] = 0; 5549 roffsets[1] = 0; 5550 for (i = 0; i < plocalsize; i++) { 5551 /* diag */ 5552 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5553 /* off diag */ 5554 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5555 /* compute offsets so that we relative location for each row */ 5556 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5557 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5558 } 5559 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5560 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5561 /* 'r' means root, and 'l' means leaf */ 5562 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5563 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5564 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5565 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5566 PetscCall(PetscSFDestroy(&sf)); 5567 PetscCall(PetscFree(roffsets)); 5568 PetscCall(PetscFree(nrcols)); 5569 dntotalcols = 0; 5570 ontotalcols = 0; 5571 ncol = 0; 5572 for (i = 0; i < nrows; i++) { 5573 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5574 ncol = PetscMax(pnnz[i], ncol); 5575 /* diag */ 5576 dntotalcols += nlcols[i * 2 + 0]; 5577 /* off diag */ 5578 ontotalcols += nlcols[i * 2 + 1]; 5579 } 5580 /* We do not need to figure the right number of columns 5581 * since all the calculations will be done by going through the raw data 5582 * */ 5583 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5584 PetscCall(MatSetUp(*P_oth)); 5585 PetscCall(PetscFree(pnnz)); 5586 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5587 /* diag */ 5588 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5589 /* off diag */ 5590 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5591 /* diag */ 5592 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5593 /* off diag */ 5594 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5595 dntotalcols = 0; 5596 ontotalcols = 0; 5597 ntotalcols = 0; 5598 for (i = 0; i < nrows; i++) { 5599 owner = 0; 5600 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5601 /* Set iremote for diag matrix */ 5602 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5603 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5604 iremote[dntotalcols].rank = owner; 5605 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5606 ilocal[dntotalcols++] = ntotalcols++; 5607 } 5608 /* off diag */ 5609 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5610 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5611 oiremote[ontotalcols].rank = owner; 5612 oilocal[ontotalcols++] = ntotalcols++; 5613 } 5614 } 5615 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5616 PetscCall(PetscFree(loffsets)); 5617 PetscCall(PetscFree(nlcols)); 5618 PetscCall(PetscSFCreate(comm, &sf)); 5619 /* P serves as roots and P_oth is leaves 5620 * Diag matrix 5621 * */ 5622 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5623 PetscCall(PetscSFSetFromOptions(sf)); 5624 PetscCall(PetscSFSetUp(sf)); 5625 5626 PetscCall(PetscSFCreate(comm, &osf)); 5627 /* Off diag */ 5628 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5629 PetscCall(PetscSFSetFromOptions(osf)); 5630 PetscCall(PetscSFSetUp(osf)); 5631 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5632 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5633 /* We operate on the matrix internal data for saving memory */ 5634 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5635 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5636 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5637 /* Convert to global indices for diag matrix */ 5638 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5639 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5640 /* We want P_oth store global indices */ 5641 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5642 /* Use memory scalable approach */ 5643 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5644 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5645 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5647 /* Convert back to local indices */ 5648 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5649 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5650 nout = 0; 5651 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5652 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5653 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5654 /* Exchange values */ 5655 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5656 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5657 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5658 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5659 /* Stop PETSc from shrinking memory */ 5660 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5661 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5662 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5663 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5664 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5665 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5666 PetscCall(PetscSFDestroy(&sf)); 5667 PetscCall(PetscSFDestroy(&osf)); 5668 PetscFunctionReturn(PETSC_SUCCESS); 5669 } 5670 5671 /* 5672 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5673 * This supports MPIAIJ and MAIJ 5674 * */ 5675 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5676 { 5677 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5678 Mat_SeqAIJ *p_oth; 5679 IS rows, map; 5680 PetscHMapI hamp; 5681 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5682 MPI_Comm comm; 5683 PetscSF sf, osf; 5684 PetscBool has; 5685 5686 PetscFunctionBegin; 5687 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5688 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5689 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5690 * and then create a submatrix (that often is an overlapping matrix) 5691 * */ 5692 if (reuse == MAT_INITIAL_MATRIX) { 5693 /* Use a hash table to figure out unique keys */ 5694 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5695 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5696 count = 0; 5697 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5698 for (i = 0; i < a->B->cmap->n; i++) { 5699 key = a->garray[i] / dof; 5700 PetscCall(PetscHMapIHas(hamp, key, &has)); 5701 if (!has) { 5702 mapping[i] = count; 5703 PetscCall(PetscHMapISet(hamp, key, count++)); 5704 } else { 5705 /* Current 'i' has the same value the previous step */ 5706 mapping[i] = count - 1; 5707 } 5708 } 5709 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5710 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5711 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5712 PetscCall(PetscCalloc1(htsize, &rowindices)); 5713 off = 0; 5714 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5715 PetscCall(PetscHMapIDestroy(&hamp)); 5716 PetscCall(PetscSortInt(htsize, rowindices)); 5717 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5718 /* In case, the matrix was already created but users want to recreate the matrix */ 5719 PetscCall(MatDestroy(P_oth)); 5720 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5721 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5722 PetscCall(ISDestroy(&map)); 5723 PetscCall(ISDestroy(&rows)); 5724 } else if (reuse == MAT_REUSE_MATRIX) { 5725 /* If matrix was already created, we simply update values using SF objects 5726 * that as attached to the matrix earlier. 5727 */ 5728 const PetscScalar *pd_a, *po_a; 5729 5730 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5731 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5732 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5733 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5734 /* Update values in place */ 5735 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5736 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5737 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5738 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5739 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5740 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5741 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5742 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5743 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5744 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5745 PetscFunctionReturn(PETSC_SUCCESS); 5746 } 5747 5748 /*@C 5749 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5750 5751 Collective 5752 5753 Input Parameters: 5754 + A - the first matrix in `MATMPIAIJ` format 5755 . B - the second matrix in `MATMPIAIJ` format 5756 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5757 5758 Output Parameters: 5759 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5760 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5761 - B_seq - the sequential matrix generated 5762 5763 Level: developer 5764 5765 @*/ 5766 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5767 { 5768 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5769 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5770 IS isrowb, iscolb; 5771 Mat *bseq = NULL; 5772 5773 PetscFunctionBegin; 5774 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5775 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5776 } 5777 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5778 5779 if (scall == MAT_INITIAL_MATRIX) { 5780 start = A->cmap->rstart; 5781 cmap = a->garray; 5782 nzA = a->A->cmap->n; 5783 nzB = a->B->cmap->n; 5784 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5785 ncols = 0; 5786 for (i = 0; i < nzB; i++) { /* row < local row index */ 5787 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5788 else break; 5789 } 5790 imark = i; 5791 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5792 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5793 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5794 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5795 } else { 5796 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5797 isrowb = *rowb; 5798 iscolb = *colb; 5799 PetscCall(PetscMalloc1(1, &bseq)); 5800 bseq[0] = *B_seq; 5801 } 5802 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5803 *B_seq = bseq[0]; 5804 PetscCall(PetscFree(bseq)); 5805 if (!rowb) { 5806 PetscCall(ISDestroy(&isrowb)); 5807 } else { 5808 *rowb = isrowb; 5809 } 5810 if (!colb) { 5811 PetscCall(ISDestroy(&iscolb)); 5812 } else { 5813 *colb = iscolb; 5814 } 5815 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5816 PetscFunctionReturn(PETSC_SUCCESS); 5817 } 5818 5819 /* 5820 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5821 of the OFF-DIAGONAL portion of local A 5822 5823 Collective 5824 5825 Input Parameters: 5826 + A,B - the matrices in `MATMPIAIJ` format 5827 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5828 5829 Output Parameter: 5830 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5831 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5832 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5833 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5834 5835 Developer Note: 5836 This directly accesses information inside the VecScatter associated with the matrix-vector product 5837 for this matrix. This is not desirable.. 5838 5839 Level: developer 5840 5841 */ 5842 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5843 { 5844 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5845 Mat_SeqAIJ *b_oth; 5846 VecScatter ctx; 5847 MPI_Comm comm; 5848 const PetscMPIInt *rprocs, *sprocs; 5849 const PetscInt *srow, *rstarts, *sstarts; 5850 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5851 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5852 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5853 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5854 PetscMPIInt size, tag, rank, nreqs; 5855 5856 PetscFunctionBegin; 5857 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5858 PetscCallMPI(MPI_Comm_size(comm, &size)); 5859 5860 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5861 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5862 } 5863 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5864 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5865 5866 if (size == 1) { 5867 startsj_s = NULL; 5868 bufa_ptr = NULL; 5869 *B_oth = NULL; 5870 PetscFunctionReturn(PETSC_SUCCESS); 5871 } 5872 5873 ctx = a->Mvctx; 5874 tag = ((PetscObject)ctx)->tag; 5875 5876 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5877 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5878 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5879 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5880 PetscCall(PetscMalloc1(nreqs, &reqs)); 5881 rwaits = reqs; 5882 swaits = reqs + nrecvs; 5883 5884 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5885 if (scall == MAT_INITIAL_MATRIX) { 5886 /* i-array */ 5887 /* post receives */ 5888 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5889 for (i = 0; i < nrecvs; i++) { 5890 rowlen = rvalues + rstarts[i] * rbs; 5891 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5892 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5893 } 5894 5895 /* pack the outgoing message */ 5896 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5897 5898 sstartsj[0] = 0; 5899 rstartsj[0] = 0; 5900 len = 0; /* total length of j or a array to be sent */ 5901 if (nsends) { 5902 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5903 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5904 } 5905 for (i = 0; i < nsends; i++) { 5906 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5907 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5908 for (j = 0; j < nrows; j++) { 5909 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5910 for (l = 0; l < sbs; l++) { 5911 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5912 5913 rowlen[j * sbs + l] = ncols; 5914 5915 len += ncols; 5916 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5917 } 5918 k++; 5919 } 5920 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5921 5922 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5923 } 5924 /* recvs and sends of i-array are completed */ 5925 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5926 PetscCall(PetscFree(svalues)); 5927 5928 /* allocate buffers for sending j and a arrays */ 5929 PetscCall(PetscMalloc1(len + 1, &bufj)); 5930 PetscCall(PetscMalloc1(len + 1, &bufa)); 5931 5932 /* create i-array of B_oth */ 5933 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5934 5935 b_othi[0] = 0; 5936 len = 0; /* total length of j or a array to be received */ 5937 k = 0; 5938 for (i = 0; i < nrecvs; i++) { 5939 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5940 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5941 for (j = 0; j < nrows; j++) { 5942 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5943 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5944 k++; 5945 } 5946 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5947 } 5948 PetscCall(PetscFree(rvalues)); 5949 5950 /* allocate space for j and a arrays of B_oth */ 5951 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5952 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5953 5954 /* j-array */ 5955 /* post receives of j-array */ 5956 for (i = 0; i < nrecvs; i++) { 5957 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5958 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5959 } 5960 5961 /* pack the outgoing message j-array */ 5962 if (nsends) k = sstarts[0]; 5963 for (i = 0; i < nsends; i++) { 5964 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5965 bufJ = bufj + sstartsj[i]; 5966 for (j = 0; j < nrows; j++) { 5967 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5968 for (ll = 0; ll < sbs; ll++) { 5969 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5970 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5971 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5972 } 5973 } 5974 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5975 } 5976 5977 /* recvs and sends of j-array are completed */ 5978 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5979 } else if (scall == MAT_REUSE_MATRIX) { 5980 sstartsj = *startsj_s; 5981 rstartsj = *startsj_r; 5982 bufa = *bufa_ptr; 5983 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5984 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5985 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5986 5987 /* a-array */ 5988 /* post receives of a-array */ 5989 for (i = 0; i < nrecvs; i++) { 5990 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5991 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5992 } 5993 5994 /* pack the outgoing message a-array */ 5995 if (nsends) k = sstarts[0]; 5996 for (i = 0; i < nsends; i++) { 5997 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5998 bufA = bufa + sstartsj[i]; 5999 for (j = 0; j < nrows; j++) { 6000 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6001 for (ll = 0; ll < sbs; ll++) { 6002 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6003 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6004 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6005 } 6006 } 6007 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6008 } 6009 /* recvs and sends of a-array are completed */ 6010 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6011 PetscCall(PetscFree(reqs)); 6012 6013 if (scall == MAT_INITIAL_MATRIX) { 6014 /* put together the new matrix */ 6015 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6016 6017 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6018 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6019 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6020 b_oth->free_a = PETSC_TRUE; 6021 b_oth->free_ij = PETSC_TRUE; 6022 b_oth->nonew = 0; 6023 6024 PetscCall(PetscFree(bufj)); 6025 if (!startsj_s || !bufa_ptr) { 6026 PetscCall(PetscFree2(sstartsj, rstartsj)); 6027 PetscCall(PetscFree(bufa_ptr)); 6028 } else { 6029 *startsj_s = sstartsj; 6030 *startsj_r = rstartsj; 6031 *bufa_ptr = bufa; 6032 } 6033 } else if (scall == MAT_REUSE_MATRIX) { 6034 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6035 } 6036 6037 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6038 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6039 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6040 PetscFunctionReturn(PETSC_SUCCESS); 6041 } 6042 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6046 #if defined(PETSC_HAVE_MKL_SPARSE) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6051 #if defined(PETSC_HAVE_ELEMENTAL) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_SCALAPACK) 6055 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_HYPRE) 6058 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_CUDA) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_HIP) 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6068 #endif 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6070 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6071 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6072 6073 /* 6074 Computes (B'*A')' since computing B*A directly is untenable 6075 6076 n p p 6077 [ ] [ ] [ ] 6078 m [ A ] * n [ B ] = m [ C ] 6079 [ ] [ ] [ ] 6080 6081 */ 6082 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6083 { 6084 Mat At, Bt, Ct; 6085 6086 PetscFunctionBegin; 6087 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6088 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6089 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6090 PetscCall(MatDestroy(&At)); 6091 PetscCall(MatDestroy(&Bt)); 6092 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6093 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6094 PetscCall(MatDestroy(&Ct)); 6095 PetscFunctionReturn(PETSC_SUCCESS); 6096 } 6097 6098 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6099 { 6100 PetscBool cisdense; 6101 6102 PetscFunctionBegin; 6103 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6104 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6105 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6106 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6107 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6108 PetscCall(MatSetUp(C)); 6109 6110 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6111 PetscFunctionReturn(PETSC_SUCCESS); 6112 } 6113 6114 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6115 { 6116 Mat_Product *product = C->product; 6117 Mat A = product->A, B = product->B; 6118 6119 PetscFunctionBegin; 6120 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6121 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6122 6123 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6124 C->ops->productsymbolic = MatProductSymbolic_AB; 6125 PetscFunctionReturn(PETSC_SUCCESS); 6126 } 6127 6128 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6129 { 6130 Mat_Product *product = C->product; 6131 6132 PetscFunctionBegin; 6133 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6134 PetscFunctionReturn(PETSC_SUCCESS); 6135 } 6136 6137 /* 6138 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6139 6140 Input Parameters: 6141 6142 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6143 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6144 6145 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6146 6147 For Set1, j1[] contains column indices of the nonzeros. 6148 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6149 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6150 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6151 6152 Similar for Set2. 6153 6154 This routine merges the two sets of nonzeros row by row and removes repeats. 6155 6156 Output Parameters: (memory is allocated by the caller) 6157 6158 i[],j[]: the CSR of the merged matrix, which has m rows. 6159 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6160 imap2[]: similar to imap1[], but for Set2. 6161 Note we order nonzeros row-by-row and from left to right. 6162 */ 6163 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6164 { 6165 PetscInt r, m; /* Row index of mat */ 6166 PetscCount t, t1, t2, b1, e1, b2, e2; 6167 6168 PetscFunctionBegin; 6169 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6170 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6171 i[0] = 0; 6172 for (r = 0; r < m; r++) { /* Do row by row merging */ 6173 b1 = rowBegin1[r]; 6174 e1 = rowEnd1[r]; 6175 b2 = rowBegin2[r]; 6176 e2 = rowEnd2[r]; 6177 while (b1 < e1 && b2 < e2) { 6178 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6179 j[t] = j1[b1]; 6180 imap1[t1] = t; 6181 imap2[t2] = t; 6182 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6183 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6184 t1++; 6185 t2++; 6186 t++; 6187 } else if (j1[b1] < j2[b2]) { 6188 j[t] = j1[b1]; 6189 imap1[t1] = t; 6190 b1 += jmap1[t1 + 1] - jmap1[t1]; 6191 t1++; 6192 t++; 6193 } else { 6194 j[t] = j2[b2]; 6195 imap2[t2] = t; 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; 6197 t2++; 6198 t++; 6199 } 6200 } 6201 /* Merge the remaining in either j1[] or j2[] */ 6202 while (b1 < e1) { 6203 j[t] = j1[b1]; 6204 imap1[t1] = t; 6205 b1 += jmap1[t1 + 1] - jmap1[t1]; 6206 t1++; 6207 t++; 6208 } 6209 while (b2 < e2) { 6210 j[t] = j2[b2]; 6211 imap2[t2] = t; 6212 b2 += jmap2[t2 + 1] - jmap2[t2]; 6213 t2++; 6214 t++; 6215 } 6216 i[r + 1] = t; 6217 } 6218 PetscFunctionReturn(PETSC_SUCCESS); 6219 } 6220 6221 /* 6222 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6223 6224 Input Parameters: 6225 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6226 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6227 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6228 6229 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6230 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6231 6232 Output Parameters: 6233 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6234 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6235 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6236 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6237 6238 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6239 Atot: number of entries belonging to the diagonal block. 6240 Annz: number of unique nonzeros belonging to the diagonal block. 6241 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6242 repeats (i.e., same 'i,j' pair). 6243 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6244 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6245 6246 Atot: number of entries belonging to the diagonal block 6247 Annz: number of unique nonzeros belonging to the diagonal block. 6248 6249 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6250 6251 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6252 */ 6253 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6254 { 6255 PetscInt cstart, cend, rstart, rend, row, col; 6256 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6257 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6258 PetscCount k, m, p, q, r, s, mid; 6259 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6260 6261 PetscFunctionBegin; 6262 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6263 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6264 m = rend - rstart; 6265 6266 for (k = 0; k < n; k++) { 6267 if (i[k] >= 0) break; 6268 } /* Skip negative rows */ 6269 6270 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6271 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6272 */ 6273 while (k < n) { 6274 row = i[k]; 6275 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6276 for (s = k; s < n; s++) 6277 if (i[s] != row) break; 6278 for (p = k; p < s; p++) { 6279 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6280 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6281 } 6282 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6283 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6284 rowBegin[row - rstart] = k; 6285 rowMid[row - rstart] = mid; 6286 rowEnd[row - rstart] = s; 6287 6288 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6289 Atot += mid - k; 6290 Btot += s - mid; 6291 6292 /* Count unique nonzeros of this diag/offdiag row */ 6293 for (p = k; p < mid;) { 6294 col = j[p]; 6295 do { 6296 j[p] += PETSC_MAX_INT; 6297 p++; 6298 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6299 Annz++; 6300 } 6301 6302 for (p = mid; p < s;) { 6303 col = j[p]; 6304 do { 6305 p++; 6306 } while (p < s && j[p] == col); 6307 Bnnz++; 6308 } 6309 k = s; 6310 } 6311 6312 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6313 PetscCall(PetscMalloc1(Atot, &Aperm)); 6314 PetscCall(PetscMalloc1(Btot, &Bperm)); 6315 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6316 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6317 6318 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6319 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6320 for (r = 0; r < m; r++) { 6321 k = rowBegin[r]; 6322 mid = rowMid[r]; 6323 s = rowEnd[r]; 6324 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6325 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6326 Atot += mid - k; 6327 Btot += s - mid; 6328 6329 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6330 for (p = k; p < mid;) { 6331 col = j[p]; 6332 q = p; 6333 do { 6334 p++; 6335 } while (p < mid && j[p] == col); 6336 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6337 Annz++; 6338 } 6339 6340 for (p = mid; p < s;) { 6341 col = j[p]; 6342 q = p; 6343 do { 6344 p++; 6345 } while (p < s && j[p] == col); 6346 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6347 Bnnz++; 6348 } 6349 } 6350 /* Output */ 6351 *Aperm_ = Aperm; 6352 *Annz_ = Annz; 6353 *Atot_ = Atot; 6354 *Ajmap_ = Ajmap; 6355 *Bperm_ = Bperm; 6356 *Bnnz_ = Bnnz; 6357 *Btot_ = Btot; 6358 *Bjmap_ = Bjmap; 6359 PetscFunctionReturn(PETSC_SUCCESS); 6360 } 6361 6362 /* 6363 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6364 6365 Input Parameters: 6366 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6367 nnz: number of unique nonzeros in the merged matrix 6368 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6369 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6370 6371 Output Parameter: (memory is allocated by the caller) 6372 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6373 6374 Example: 6375 nnz1 = 4 6376 nnz = 6 6377 imap = [1,3,4,5] 6378 jmap = [0,3,5,6,7] 6379 then, 6380 jmap_new = [0,0,3,3,5,6,7] 6381 */ 6382 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6383 { 6384 PetscCount k, p; 6385 6386 PetscFunctionBegin; 6387 jmap_new[0] = 0; 6388 p = nnz; /* p loops over jmap_new[] backwards */ 6389 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6390 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6391 } 6392 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6393 PetscFunctionReturn(PETSC_SUCCESS); 6394 } 6395 6396 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6397 { 6398 MPI_Comm comm; 6399 PetscMPIInt rank, size; 6400 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6401 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6402 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6403 6404 PetscFunctionBegin; 6405 PetscCall(PetscFree(mpiaij->garray)); 6406 PetscCall(VecDestroy(&mpiaij->lvec)); 6407 #if defined(PETSC_USE_CTABLE) 6408 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6409 #else 6410 PetscCall(PetscFree(mpiaij->colmap)); 6411 #endif 6412 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6413 mat->assembled = PETSC_FALSE; 6414 mat->was_assembled = PETSC_FALSE; 6415 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6416 6417 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6418 PetscCallMPI(MPI_Comm_size(comm, &size)); 6419 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6420 PetscCall(PetscLayoutSetUp(mat->rmap)); 6421 PetscCall(PetscLayoutSetUp(mat->cmap)); 6422 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6423 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6424 PetscCall(MatGetLocalSize(mat, &m, &n)); 6425 PetscCall(MatGetSize(mat, &M, &N)); 6426 6427 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6428 /* entries come first, then local rows, then remote rows. */ 6429 PetscCount n1 = coo_n, *perm1; 6430 PetscInt *i1 = coo_i, *j1 = coo_j; 6431 6432 PetscCall(PetscMalloc1(n1, &perm1)); 6433 for (k = 0; k < n1; k++) perm1[k] = k; 6434 6435 /* Manipulate indices so that entries with negative row or col indices will have smallest 6436 row indices, local entries will have greater but negative row indices, and remote entries 6437 will have positive row indices. 6438 */ 6439 for (k = 0; k < n1; k++) { 6440 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6441 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6442 else { 6443 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6444 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6445 } 6446 } 6447 6448 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6449 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6450 for (k = 0; k < n1; k++) { 6451 if (i1[k] > PETSC_MIN_INT) break; 6452 } /* Advance k to the first entry we need to take care of */ 6453 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6454 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6455 6456 /* Split local rows into diag/offdiag portions */ 6457 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6458 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6459 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6460 6461 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6462 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6463 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6464 6465 /* Send remote rows to their owner */ 6466 /* Find which rows should be sent to which remote ranks*/ 6467 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6468 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6469 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6470 const PetscInt *ranges; 6471 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6472 6473 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6474 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6475 for (k = rem; k < n1;) { 6476 PetscMPIInt owner; 6477 PetscInt firstRow, lastRow; 6478 6479 /* Locate a row range */ 6480 firstRow = i1[k]; /* first row of this owner */ 6481 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6482 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6483 6484 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6485 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6486 6487 /* All entries in [k,p) belong to this remote owner */ 6488 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6489 PetscMPIInt *sendto2; 6490 PetscInt *nentries2; 6491 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6492 6493 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6494 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6495 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6496 PetscCall(PetscFree2(sendto, nentries2)); 6497 sendto = sendto2; 6498 nentries = nentries2; 6499 maxNsend = maxNsend2; 6500 } 6501 sendto[nsend] = owner; 6502 nentries[nsend] = p - k; 6503 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6504 nsend++; 6505 k = p; 6506 } 6507 6508 /* Build 1st SF to know offsets on remote to send data */ 6509 PetscSF sf1; 6510 PetscInt nroots = 1, nroots2 = 0; 6511 PetscInt nleaves = nsend, nleaves2 = 0; 6512 PetscInt *offsets; 6513 PetscSFNode *iremote; 6514 6515 PetscCall(PetscSFCreate(comm, &sf1)); 6516 PetscCall(PetscMalloc1(nsend, &iremote)); 6517 PetscCall(PetscMalloc1(nsend, &offsets)); 6518 for (k = 0; k < nsend; k++) { 6519 iremote[k].rank = sendto[k]; 6520 iremote[k].index = 0; 6521 nleaves2 += nentries[k]; 6522 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6523 } 6524 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6525 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6526 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6527 PetscCall(PetscSFDestroy(&sf1)); 6528 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6529 6530 /* Build 2nd SF to send remote COOs to their owner */ 6531 PetscSF sf2; 6532 nroots = nroots2; 6533 nleaves = nleaves2; 6534 PetscCall(PetscSFCreate(comm, &sf2)); 6535 PetscCall(PetscSFSetFromOptions(sf2)); 6536 PetscCall(PetscMalloc1(nleaves, &iremote)); 6537 p = 0; 6538 for (k = 0; k < nsend; k++) { 6539 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6540 for (q = 0; q < nentries[k]; q++, p++) { 6541 iremote[p].rank = sendto[k]; 6542 iremote[p].index = offsets[k] + q; 6543 } 6544 } 6545 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6546 6547 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6548 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6549 6550 /* Send the remote COOs to their owner */ 6551 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6552 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6553 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6554 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6555 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6558 6559 PetscCall(PetscFree(offsets)); 6560 PetscCall(PetscFree2(sendto, nentries)); 6561 6562 /* Sort received COOs by row along with the permutation array */ 6563 for (k = 0; k < n2; k++) perm2[k] = k; 6564 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6565 6566 /* Split received COOs into diag/offdiag portions */ 6567 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6568 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6569 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6570 6571 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6572 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6573 6574 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6575 PetscInt *Ai, *Bi; 6576 PetscInt *Aj, *Bj; 6577 6578 PetscCall(PetscMalloc1(m + 1, &Ai)); 6579 PetscCall(PetscMalloc1(m + 1, &Bi)); 6580 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6581 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6582 6583 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6584 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6585 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6586 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6587 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6588 6589 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6590 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6591 6592 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6593 /* expect nonzeros in A/B most likely have local contributing entries */ 6594 PetscInt Annz = Ai[m]; 6595 PetscInt Bnnz = Bi[m]; 6596 PetscCount *Ajmap1_new, *Bjmap1_new; 6597 6598 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6599 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6600 6601 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6602 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6603 6604 PetscCall(PetscFree(Aimap1)); 6605 PetscCall(PetscFree(Ajmap1)); 6606 PetscCall(PetscFree(Bimap1)); 6607 PetscCall(PetscFree(Bjmap1)); 6608 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6609 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6610 PetscCall(PetscFree(perm1)); 6611 PetscCall(PetscFree3(i2, j2, perm2)); 6612 6613 Ajmap1 = Ajmap1_new; 6614 Bjmap1 = Bjmap1_new; 6615 6616 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6617 if (Annz < Annz1 + Annz2) { 6618 PetscInt *Aj_new; 6619 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6620 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6621 PetscCall(PetscFree(Aj)); 6622 Aj = Aj_new; 6623 } 6624 6625 if (Bnnz < Bnnz1 + Bnnz2) { 6626 PetscInt *Bj_new; 6627 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6628 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6629 PetscCall(PetscFree(Bj)); 6630 Bj = Bj_new; 6631 } 6632 6633 /* Create new submatrices for on-process and off-process coupling */ 6634 PetscScalar *Aa, *Ba; 6635 MatType rtype; 6636 Mat_SeqAIJ *a, *b; 6637 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6638 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6639 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6640 if (cstart) { 6641 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6642 } 6643 PetscCall(MatDestroy(&mpiaij->A)); 6644 PetscCall(MatDestroy(&mpiaij->B)); 6645 PetscCall(MatGetRootType_Private(mat, &rtype)); 6646 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6647 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6648 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6649 6650 a = (Mat_SeqAIJ *)mpiaij->A->data; 6651 b = (Mat_SeqAIJ *)mpiaij->B->data; 6652 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6653 a->free_a = b->free_a = PETSC_TRUE; 6654 a->free_ij = b->free_ij = PETSC_TRUE; 6655 6656 /* conversion must happen AFTER multiply setup */ 6657 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6658 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6659 PetscCall(VecDestroy(&mpiaij->lvec)); 6660 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6661 6662 mpiaij->coo_n = coo_n; 6663 mpiaij->coo_sf = sf2; 6664 mpiaij->sendlen = nleaves; 6665 mpiaij->recvlen = nroots; 6666 6667 mpiaij->Annz = Annz; 6668 mpiaij->Bnnz = Bnnz; 6669 6670 mpiaij->Annz2 = Annz2; 6671 mpiaij->Bnnz2 = Bnnz2; 6672 6673 mpiaij->Atot1 = Atot1; 6674 mpiaij->Atot2 = Atot2; 6675 mpiaij->Btot1 = Btot1; 6676 mpiaij->Btot2 = Btot2; 6677 6678 mpiaij->Ajmap1 = Ajmap1; 6679 mpiaij->Aperm1 = Aperm1; 6680 6681 mpiaij->Bjmap1 = Bjmap1; 6682 mpiaij->Bperm1 = Bperm1; 6683 6684 mpiaij->Aimap2 = Aimap2; 6685 mpiaij->Ajmap2 = Ajmap2; 6686 mpiaij->Aperm2 = Aperm2; 6687 6688 mpiaij->Bimap2 = Bimap2; 6689 mpiaij->Bjmap2 = Bjmap2; 6690 mpiaij->Bperm2 = Bperm2; 6691 6692 mpiaij->Cperm1 = Cperm1; 6693 6694 /* Allocate in preallocation. If not used, it has zero cost on host */ 6695 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6696 PetscFunctionReturn(PETSC_SUCCESS); 6697 } 6698 6699 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6700 { 6701 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6702 Mat A = mpiaij->A, B = mpiaij->B; 6703 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6704 PetscScalar *Aa, *Ba; 6705 PetscScalar *sendbuf = mpiaij->sendbuf; 6706 PetscScalar *recvbuf = mpiaij->recvbuf; 6707 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6708 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6709 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6710 const PetscCount *Cperm1 = mpiaij->Cperm1; 6711 6712 PetscFunctionBegin; 6713 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6714 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6715 6716 /* Pack entries to be sent to remote */ 6717 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6718 6719 /* Send remote entries to their owner and overlap the communication with local computation */ 6720 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6721 /* Add local entries to A and B */ 6722 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6723 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6724 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6725 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6726 } 6727 for (PetscCount i = 0; i < Bnnz; i++) { 6728 PetscScalar sum = 0.0; 6729 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6730 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6731 } 6732 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6733 6734 /* Add received remote entries to A and B */ 6735 for (PetscCount i = 0; i < Annz2; i++) { 6736 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6737 } 6738 for (PetscCount i = 0; i < Bnnz2; i++) { 6739 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6740 } 6741 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6742 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6743 PetscFunctionReturn(PETSC_SUCCESS); 6744 } 6745 6746 /*MC 6747 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6748 6749 Options Database Keys: 6750 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6751 6752 Level: beginner 6753 6754 Notes: 6755 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6756 in this case the values associated with the rows and columns one passes in are set to zero 6757 in the matrix 6758 6759 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6760 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6761 6762 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6763 M*/ 6764 6765 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6766 { 6767 Mat_MPIAIJ *b; 6768 PetscMPIInt size; 6769 6770 PetscFunctionBegin; 6771 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6772 6773 PetscCall(PetscNew(&b)); 6774 B->data = (void *)b; 6775 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6776 B->assembled = PETSC_FALSE; 6777 B->insertmode = NOT_SET_VALUES; 6778 b->size = size; 6779 6780 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6781 6782 /* build cache for off array entries formed */ 6783 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6784 6785 b->donotstash = PETSC_FALSE; 6786 b->colmap = NULL; 6787 b->garray = NULL; 6788 b->roworiented = PETSC_TRUE; 6789 6790 /* stuff used for matrix vector multiply */ 6791 b->lvec = NULL; 6792 b->Mvctx = NULL; 6793 6794 /* stuff for MatGetRow() */ 6795 b->rowindices = NULL; 6796 b->rowvalues = NULL; 6797 b->getrowactive = PETSC_FALSE; 6798 6799 /* flexible pointer used in CUSPARSE classes */ 6800 b->spptr = NULL; 6801 6802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6812 #if defined(PETSC_HAVE_CUDA) 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6814 #endif 6815 #if defined(PETSC_HAVE_HIP) 6816 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6817 #endif 6818 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6819 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6820 #endif 6821 #if defined(PETSC_HAVE_MKL_SPARSE) 6822 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6823 #endif 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6828 #if defined(PETSC_HAVE_ELEMENTAL) 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6830 #endif 6831 #if defined(PETSC_HAVE_SCALAPACK) 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6833 #endif 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6836 #if defined(PETSC_HAVE_HYPRE) 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6839 #endif 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6844 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6845 PetscFunctionReturn(PETSC_SUCCESS); 6846 } 6847 6848 /*@C 6849 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6850 and "off-diagonal" part of the matrix in CSR format. 6851 6852 Collective 6853 6854 Input Parameters: 6855 + comm - MPI communicator 6856 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6857 . n - This value should be the same as the local size used in creating the 6858 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6859 calculated if N is given) For square matrices n is almost always m. 6860 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6861 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6862 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6863 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6864 . a - matrix values 6865 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6866 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6867 - oa - matrix values 6868 6869 Output Parameter: 6870 . mat - the matrix 6871 6872 Level: advanced 6873 6874 Notes: 6875 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6876 must free the arrays once the matrix has been destroyed and not before. 6877 6878 The i and j indices are 0 based 6879 6880 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6881 6882 This sets local rows and cannot be used to set off-processor values. 6883 6884 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6885 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6886 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6887 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6888 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6889 communication if it is known that only local entries will be set. 6890 6891 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6892 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6893 @*/ 6894 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6895 { 6896 Mat_MPIAIJ *maij; 6897 6898 PetscFunctionBegin; 6899 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6900 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6901 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6902 PetscCall(MatCreate(comm, mat)); 6903 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6904 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6905 maij = (Mat_MPIAIJ *)(*mat)->data; 6906 6907 (*mat)->preallocated = PETSC_TRUE; 6908 6909 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6910 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6911 6912 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6913 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6914 6915 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6916 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6917 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6918 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6919 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6920 PetscFunctionReturn(PETSC_SUCCESS); 6921 } 6922 6923 typedef struct { 6924 Mat *mp; /* intermediate products */ 6925 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6926 PetscInt cp; /* number of intermediate products */ 6927 6928 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6929 PetscInt *startsj_s, *startsj_r; 6930 PetscScalar *bufa; 6931 Mat P_oth; 6932 6933 /* may take advantage of merging product->B */ 6934 Mat Bloc; /* B-local by merging diag and off-diag */ 6935 6936 /* cusparse does not have support to split between symbolic and numeric phases. 6937 When api_user is true, we don't need to update the numerical values 6938 of the temporary storage */ 6939 PetscBool reusesym; 6940 6941 /* support for COO values insertion */ 6942 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6943 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6944 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6945 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6946 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6947 PetscMemType mtype; 6948 6949 /* customization */ 6950 PetscBool abmerge; 6951 PetscBool P_oth_bind; 6952 } MatMatMPIAIJBACKEND; 6953 6954 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6955 { 6956 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6957 PetscInt i; 6958 6959 PetscFunctionBegin; 6960 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6961 PetscCall(PetscFree(mmdata->bufa)); 6962 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6963 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6964 PetscCall(MatDestroy(&mmdata->P_oth)); 6965 PetscCall(MatDestroy(&mmdata->Bloc)); 6966 PetscCall(PetscSFDestroy(&mmdata->sf)); 6967 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6968 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6969 PetscCall(PetscFree(mmdata->own[0])); 6970 PetscCall(PetscFree(mmdata->own)); 6971 PetscCall(PetscFree(mmdata->off[0])); 6972 PetscCall(PetscFree(mmdata->off)); 6973 PetscCall(PetscFree(mmdata)); 6974 PetscFunctionReturn(PETSC_SUCCESS); 6975 } 6976 6977 /* Copy selected n entries with indices in idx[] of A to v[]. 6978 If idx is NULL, copy the whole data array of A to v[] 6979 */ 6980 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6981 { 6982 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6983 6984 PetscFunctionBegin; 6985 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6986 if (f) { 6987 PetscCall((*f)(A, n, idx, v)); 6988 } else { 6989 const PetscScalar *vv; 6990 6991 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6992 if (n && idx) { 6993 PetscScalar *w = v; 6994 const PetscInt *oi = idx; 6995 PetscInt j; 6996 6997 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6998 } else { 6999 PetscCall(PetscArraycpy(v, vv, n)); 7000 } 7001 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7002 } 7003 PetscFunctionReturn(PETSC_SUCCESS); 7004 } 7005 7006 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7007 { 7008 MatMatMPIAIJBACKEND *mmdata; 7009 PetscInt i, n_d, n_o; 7010 7011 PetscFunctionBegin; 7012 MatCheckProduct(C, 1); 7013 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7014 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7015 if (!mmdata->reusesym) { /* update temporary matrices */ 7016 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7017 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7018 } 7019 mmdata->reusesym = PETSC_FALSE; 7020 7021 for (i = 0; i < mmdata->cp; i++) { 7022 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7023 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7024 } 7025 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7026 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7027 7028 if (mmdata->mptmp[i]) continue; 7029 if (noff) { 7030 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7031 7032 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7033 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7034 n_o += noff; 7035 n_d += nown; 7036 } else { 7037 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7038 7039 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7040 n_d += mm->nz; 7041 } 7042 } 7043 if (mmdata->hasoffproc) { /* offprocess insertion */ 7044 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7045 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7046 } 7047 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7048 PetscFunctionReturn(PETSC_SUCCESS); 7049 } 7050 7051 /* Support for Pt * A, A * P, or Pt * A * P */ 7052 #define MAX_NUMBER_INTERMEDIATE 4 7053 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7054 { 7055 Mat_Product *product = C->product; 7056 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7057 Mat_MPIAIJ *a, *p; 7058 MatMatMPIAIJBACKEND *mmdata; 7059 ISLocalToGlobalMapping P_oth_l2g = NULL; 7060 IS glob = NULL; 7061 const char *prefix; 7062 char pprefix[256]; 7063 const PetscInt *globidx, *P_oth_idx; 7064 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7065 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7066 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7067 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7068 /* a base offset; type-2: sparse with a local to global map table */ 7069 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7070 7071 MatProductType ptype; 7072 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7073 PetscMPIInt size; 7074 7075 PetscFunctionBegin; 7076 MatCheckProduct(C, 1); 7077 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7078 ptype = product->type; 7079 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7080 ptype = MATPRODUCT_AB; 7081 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7082 } 7083 switch (ptype) { 7084 case MATPRODUCT_AB: 7085 A = product->A; 7086 P = product->B; 7087 m = A->rmap->n; 7088 n = P->cmap->n; 7089 M = A->rmap->N; 7090 N = P->cmap->N; 7091 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7092 break; 7093 case MATPRODUCT_AtB: 7094 P = product->A; 7095 A = product->B; 7096 m = P->cmap->n; 7097 n = A->cmap->n; 7098 M = P->cmap->N; 7099 N = A->cmap->N; 7100 hasoffproc = PETSC_TRUE; 7101 break; 7102 case MATPRODUCT_PtAP: 7103 A = product->A; 7104 P = product->B; 7105 m = P->cmap->n; 7106 n = P->cmap->n; 7107 M = P->cmap->N; 7108 N = P->cmap->N; 7109 hasoffproc = PETSC_TRUE; 7110 break; 7111 default: 7112 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7113 } 7114 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7115 if (size == 1) hasoffproc = PETSC_FALSE; 7116 7117 /* defaults */ 7118 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7119 mp[i] = NULL; 7120 mptmp[i] = PETSC_FALSE; 7121 rmapt[i] = -1; 7122 cmapt[i] = -1; 7123 rmapa[i] = NULL; 7124 cmapa[i] = NULL; 7125 } 7126 7127 /* customization */ 7128 PetscCall(PetscNew(&mmdata)); 7129 mmdata->reusesym = product->api_user; 7130 if (ptype == MATPRODUCT_AB) { 7131 if (product->api_user) { 7132 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7133 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7134 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7135 PetscOptionsEnd(); 7136 } else { 7137 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7138 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7139 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7140 PetscOptionsEnd(); 7141 } 7142 } else if (ptype == MATPRODUCT_PtAP) { 7143 if (product->api_user) { 7144 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7145 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7146 PetscOptionsEnd(); 7147 } else { 7148 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7149 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7150 PetscOptionsEnd(); 7151 } 7152 } 7153 a = (Mat_MPIAIJ *)A->data; 7154 p = (Mat_MPIAIJ *)P->data; 7155 PetscCall(MatSetSizes(C, m, n, M, N)); 7156 PetscCall(PetscLayoutSetUp(C->rmap)); 7157 PetscCall(PetscLayoutSetUp(C->cmap)); 7158 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7159 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7160 7161 cp = 0; 7162 switch (ptype) { 7163 case MATPRODUCT_AB: /* A * P */ 7164 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7165 7166 /* A_diag * P_local (merged or not) */ 7167 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7168 /* P is product->B */ 7169 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7170 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7171 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7172 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7173 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7174 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7175 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7176 mp[cp]->product->api_user = product->api_user; 7177 PetscCall(MatProductSetFromOptions(mp[cp])); 7178 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7179 PetscCall(ISGetIndices(glob, &globidx)); 7180 rmapt[cp] = 1; 7181 cmapt[cp] = 2; 7182 cmapa[cp] = globidx; 7183 mptmp[cp] = PETSC_FALSE; 7184 cp++; 7185 } else { /* A_diag * P_diag and A_diag * P_off */ 7186 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7187 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7188 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7189 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7190 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7191 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7192 mp[cp]->product->api_user = product->api_user; 7193 PetscCall(MatProductSetFromOptions(mp[cp])); 7194 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7195 rmapt[cp] = 1; 7196 cmapt[cp] = 1; 7197 mptmp[cp] = PETSC_FALSE; 7198 cp++; 7199 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7200 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7201 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7202 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7203 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7204 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7205 mp[cp]->product->api_user = product->api_user; 7206 PetscCall(MatProductSetFromOptions(mp[cp])); 7207 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7208 rmapt[cp] = 1; 7209 cmapt[cp] = 2; 7210 cmapa[cp] = p->garray; 7211 mptmp[cp] = PETSC_FALSE; 7212 cp++; 7213 } 7214 7215 /* A_off * P_other */ 7216 if (mmdata->P_oth) { 7217 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7218 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7219 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7220 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7221 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7222 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7223 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7224 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7225 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7226 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7227 mp[cp]->product->api_user = product->api_user; 7228 PetscCall(MatProductSetFromOptions(mp[cp])); 7229 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7230 rmapt[cp] = 1; 7231 cmapt[cp] = 2; 7232 cmapa[cp] = P_oth_idx; 7233 mptmp[cp] = PETSC_FALSE; 7234 cp++; 7235 } 7236 break; 7237 7238 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7239 /* A is product->B */ 7240 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7241 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7242 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7243 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7244 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7245 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7246 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7247 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7248 mp[cp]->product->api_user = product->api_user; 7249 PetscCall(MatProductSetFromOptions(mp[cp])); 7250 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7251 PetscCall(ISGetIndices(glob, &globidx)); 7252 rmapt[cp] = 2; 7253 rmapa[cp] = globidx; 7254 cmapt[cp] = 2; 7255 cmapa[cp] = globidx; 7256 mptmp[cp] = PETSC_FALSE; 7257 cp++; 7258 } else { 7259 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7260 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7261 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7262 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7263 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7264 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7265 mp[cp]->product->api_user = product->api_user; 7266 PetscCall(MatProductSetFromOptions(mp[cp])); 7267 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7268 PetscCall(ISGetIndices(glob, &globidx)); 7269 rmapt[cp] = 1; 7270 cmapt[cp] = 2; 7271 cmapa[cp] = globidx; 7272 mptmp[cp] = PETSC_FALSE; 7273 cp++; 7274 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7276 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7283 rmapt[cp] = 2; 7284 rmapa[cp] = p->garray; 7285 cmapt[cp] = 2; 7286 cmapa[cp] = globidx; 7287 mptmp[cp] = PETSC_FALSE; 7288 cp++; 7289 } 7290 break; 7291 case MATPRODUCT_PtAP: 7292 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7293 /* P is product->B */ 7294 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7295 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 PetscCall(ISGetIndices(glob, &globidx)); 7305 rmapt[cp] = 2; 7306 rmapa[cp] = globidx; 7307 cmapt[cp] = 2; 7308 cmapa[cp] = globidx; 7309 mptmp[cp] = PETSC_FALSE; 7310 cp++; 7311 if (mmdata->P_oth) { 7312 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7313 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7314 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7315 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7316 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7317 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7318 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7319 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7320 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7321 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7322 mp[cp]->product->api_user = product->api_user; 7323 PetscCall(MatProductSetFromOptions(mp[cp])); 7324 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7325 mptmp[cp] = PETSC_TRUE; 7326 cp++; 7327 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 rmapt[cp] = 2; 7337 rmapa[cp] = globidx; 7338 cmapt[cp] = 2; 7339 cmapa[cp] = P_oth_idx; 7340 mptmp[cp] = PETSC_FALSE; 7341 cp++; 7342 } 7343 break; 7344 default: 7345 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7346 } 7347 /* sanity check */ 7348 if (size > 1) 7349 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7350 7351 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7352 for (i = 0; i < cp; i++) { 7353 mmdata->mp[i] = mp[i]; 7354 mmdata->mptmp[i] = mptmp[i]; 7355 } 7356 mmdata->cp = cp; 7357 C->product->data = mmdata; 7358 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7359 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7360 7361 /* memory type */ 7362 mmdata->mtype = PETSC_MEMTYPE_HOST; 7363 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7364 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7365 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7366 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7367 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7368 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7369 7370 /* prepare coo coordinates for values insertion */ 7371 7372 /* count total nonzeros of those intermediate seqaij Mats 7373 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7374 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7375 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7376 */ 7377 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7378 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7379 if (mptmp[cp]) continue; 7380 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7381 const PetscInt *rmap = rmapa[cp]; 7382 const PetscInt mr = mp[cp]->rmap->n; 7383 const PetscInt rs = C->rmap->rstart; 7384 const PetscInt re = C->rmap->rend; 7385 const PetscInt *ii = mm->i; 7386 for (i = 0; i < mr; i++) { 7387 const PetscInt gr = rmap[i]; 7388 const PetscInt nz = ii[i + 1] - ii[i]; 7389 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7390 else ncoo_oown += nz; /* this row is local */ 7391 } 7392 } else ncoo_d += mm->nz; 7393 } 7394 7395 /* 7396 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7397 7398 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7399 7400 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7401 7402 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7403 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7404 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7405 7406 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7407 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7408 */ 7409 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7410 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7411 7412 /* gather (i,j) of nonzeros inserted by remote procs */ 7413 if (hasoffproc) { 7414 PetscSF msf; 7415 PetscInt ncoo2, *coo_i2, *coo_j2; 7416 7417 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7418 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7419 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7420 7421 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7422 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7423 PetscInt *idxoff = mmdata->off[cp]; 7424 PetscInt *idxown = mmdata->own[cp]; 7425 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7426 const PetscInt *rmap = rmapa[cp]; 7427 const PetscInt *cmap = cmapa[cp]; 7428 const PetscInt *ii = mm->i; 7429 PetscInt *coi = coo_i + ncoo_o; 7430 PetscInt *coj = coo_j + ncoo_o; 7431 const PetscInt mr = mp[cp]->rmap->n; 7432 const PetscInt rs = C->rmap->rstart; 7433 const PetscInt re = C->rmap->rend; 7434 const PetscInt cs = C->cmap->rstart; 7435 for (i = 0; i < mr; i++) { 7436 const PetscInt *jj = mm->j + ii[i]; 7437 const PetscInt gr = rmap[i]; 7438 const PetscInt nz = ii[i + 1] - ii[i]; 7439 if (gr < rs || gr >= re) { /* this is an offproc row */ 7440 for (j = ii[i]; j < ii[i + 1]; j++) { 7441 *coi++ = gr; 7442 *idxoff++ = j; 7443 } 7444 if (!cmapt[cp]) { /* already global */ 7445 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7446 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7447 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7448 } else { /* offdiag */ 7449 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7450 } 7451 ncoo_o += nz; 7452 } else { /* this is a local row */ 7453 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7454 } 7455 } 7456 } 7457 mmdata->off[cp + 1] = idxoff; 7458 mmdata->own[cp + 1] = idxown; 7459 } 7460 7461 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7462 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7463 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7464 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7465 ncoo = ncoo_d + ncoo_oown + ncoo2; 7466 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7467 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7468 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7469 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7470 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7471 PetscCall(PetscFree2(coo_i, coo_j)); 7472 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7473 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7474 coo_i = coo_i2; 7475 coo_j = coo_j2; 7476 } else { /* no offproc values insertion */ 7477 ncoo = ncoo_d; 7478 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7479 7480 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7481 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7482 PetscCall(PetscSFSetUp(mmdata->sf)); 7483 } 7484 mmdata->hasoffproc = hasoffproc; 7485 7486 /* gather (i,j) of nonzeros inserted locally */ 7487 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7488 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7489 PetscInt *coi = coo_i + ncoo_d; 7490 PetscInt *coj = coo_j + ncoo_d; 7491 const PetscInt *jj = mm->j; 7492 const PetscInt *ii = mm->i; 7493 const PetscInt *cmap = cmapa[cp]; 7494 const PetscInt *rmap = rmapa[cp]; 7495 const PetscInt mr = mp[cp]->rmap->n; 7496 const PetscInt rs = C->rmap->rstart; 7497 const PetscInt re = C->rmap->rend; 7498 const PetscInt cs = C->cmap->rstart; 7499 7500 if (mptmp[cp]) continue; 7501 if (rmapt[cp] == 1) { /* consecutive rows */ 7502 /* fill coo_i */ 7503 for (i = 0; i < mr; i++) { 7504 const PetscInt gr = i + rs; 7505 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7506 } 7507 /* fill coo_j */ 7508 if (!cmapt[cp]) { /* type-0, already global */ 7509 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7510 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7511 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7512 } else { /* type-2, local to global for sparse columns */ 7513 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7514 } 7515 ncoo_d += mm->nz; 7516 } else if (rmapt[cp] == 2) { /* sparse rows */ 7517 for (i = 0; i < mr; i++) { 7518 const PetscInt *jj = mm->j + ii[i]; 7519 const PetscInt gr = rmap[i]; 7520 const PetscInt nz = ii[i + 1] - ii[i]; 7521 if (gr >= rs && gr < re) { /* local rows */ 7522 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7523 if (!cmapt[cp]) { /* type-0, already global */ 7524 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7525 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7526 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7527 } else { /* type-2, local to global for sparse columns */ 7528 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7529 } 7530 ncoo_d += nz; 7531 } 7532 } 7533 } 7534 } 7535 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7536 PetscCall(ISDestroy(&glob)); 7537 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7538 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7539 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7540 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7541 7542 /* preallocate with COO data */ 7543 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7544 PetscCall(PetscFree2(coo_i, coo_j)); 7545 PetscFunctionReturn(PETSC_SUCCESS); 7546 } 7547 7548 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7549 { 7550 Mat_Product *product = mat->product; 7551 #if defined(PETSC_HAVE_DEVICE) 7552 PetscBool match = PETSC_FALSE; 7553 PetscBool usecpu = PETSC_FALSE; 7554 #else 7555 PetscBool match = PETSC_TRUE; 7556 #endif 7557 7558 PetscFunctionBegin; 7559 MatCheckProduct(mat, 1); 7560 #if defined(PETSC_HAVE_DEVICE) 7561 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7562 if (match) { /* we can always fallback to the CPU if requested */ 7563 switch (product->type) { 7564 case MATPRODUCT_AB: 7565 if (product->api_user) { 7566 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7567 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7568 PetscOptionsEnd(); 7569 } else { 7570 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7571 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7572 PetscOptionsEnd(); 7573 } 7574 break; 7575 case MATPRODUCT_AtB: 7576 if (product->api_user) { 7577 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7578 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7579 PetscOptionsEnd(); 7580 } else { 7581 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7582 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7583 PetscOptionsEnd(); 7584 } 7585 break; 7586 case MATPRODUCT_PtAP: 7587 if (product->api_user) { 7588 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7589 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7590 PetscOptionsEnd(); 7591 } else { 7592 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7593 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7594 PetscOptionsEnd(); 7595 } 7596 break; 7597 default: 7598 break; 7599 } 7600 match = (PetscBool)!usecpu; 7601 } 7602 #endif 7603 if (match) { 7604 switch (product->type) { 7605 case MATPRODUCT_AB: 7606 case MATPRODUCT_AtB: 7607 case MATPRODUCT_PtAP: 7608 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7609 break; 7610 default: 7611 break; 7612 } 7613 } 7614 /* fallback to MPIAIJ ops */ 7615 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7616 PetscFunctionReturn(PETSC_SUCCESS); 7617 } 7618 7619 /* 7620 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7621 7622 n - the number of block indices in cc[] 7623 cc - the block indices (must be large enough to contain the indices) 7624 */ 7625 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7626 { 7627 PetscInt cnt = -1, nidx, j; 7628 const PetscInt *idx; 7629 7630 PetscFunctionBegin; 7631 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7632 if (nidx) { 7633 cnt = 0; 7634 cc[cnt] = idx[0] / bs; 7635 for (j = 1; j < nidx; j++) { 7636 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7637 } 7638 } 7639 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7640 *n = cnt + 1; 7641 PetscFunctionReturn(PETSC_SUCCESS); 7642 } 7643 7644 /* 7645 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7646 7647 ncollapsed - the number of block indices 7648 collapsed - the block indices (must be large enough to contain the indices) 7649 */ 7650 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7651 { 7652 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7653 7654 PetscFunctionBegin; 7655 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7656 for (i = start + 1; i < start + bs; i++) { 7657 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7658 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7659 cprevtmp = cprev; 7660 cprev = merged; 7661 merged = cprevtmp; 7662 } 7663 *ncollapsed = nprev; 7664 if (collapsed) *collapsed = cprev; 7665 PetscFunctionReturn(PETSC_SUCCESS); 7666 } 7667 7668 /* 7669 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7670 */ 7671 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7672 { 7673 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7674 Mat tGmat; 7675 MPI_Comm comm; 7676 const PetscScalar *vals; 7677 const PetscInt *idx; 7678 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7679 MatScalar *AA; // this is checked in graph 7680 PetscBool isseqaij; 7681 Mat a, b, c; 7682 MatType jtype; 7683 7684 PetscFunctionBegin; 7685 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7686 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7687 PetscCall(MatGetType(Gmat, &jtype)); 7688 PetscCall(MatCreate(comm, &tGmat)); 7689 PetscCall(MatSetType(tGmat, jtype)); 7690 7691 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7692 Also, if the matrix is symmetric, can we skip this 7693 operation? It can be very expensive on large matrices. */ 7694 7695 // global sizes 7696 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7697 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7698 nloc = Iend - Istart; 7699 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7700 if (isseqaij) { 7701 a = Gmat; 7702 b = NULL; 7703 } else { 7704 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7705 a = d->A; 7706 b = d->B; 7707 garray = d->garray; 7708 } 7709 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7710 for (PetscInt row = 0; row < nloc; row++) { 7711 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7712 d_nnz[row] = ncols; 7713 if (ncols > maxcols) maxcols = ncols; 7714 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7715 } 7716 if (b) { 7717 for (PetscInt row = 0; row < nloc; row++) { 7718 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7719 o_nnz[row] = ncols; 7720 if (ncols > maxcols) maxcols = ncols; 7721 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7722 } 7723 } 7724 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7725 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7726 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7727 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7728 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7729 PetscCall(PetscFree2(d_nnz, o_nnz)); 7730 // 7731 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7732 nnz0 = nnz1 = 0; 7733 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7734 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7735 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7736 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7737 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7738 if (PetscRealPart(sv) > vfilter) { 7739 nnz1++; 7740 PetscInt cid = idx[jj] + Istart; //diag 7741 if (c != a) cid = garray[idx[jj]]; 7742 AA[ncol_row] = vals[jj]; 7743 AJ[ncol_row] = cid; 7744 ncol_row++; 7745 } 7746 } 7747 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7748 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7749 } 7750 } 7751 PetscCall(PetscFree2(AA, AJ)); 7752 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7753 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7754 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7755 7756 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7757 7758 *filteredG = tGmat; 7759 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7760 PetscFunctionReturn(PETSC_SUCCESS); 7761 } 7762 7763 /* 7764 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7765 7766 Input Parameter: 7767 . Amat - matrix 7768 - symmetrize - make the result symmetric 7769 + scale - scale with diagonal 7770 7771 Output Parameter: 7772 . a_Gmat - output scalar graph >= 0 7773 7774 */ 7775 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7776 { 7777 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7778 MPI_Comm comm; 7779 Mat Gmat; 7780 PetscBool ismpiaij, isseqaij; 7781 Mat a, b, c; 7782 MatType jtype; 7783 7784 PetscFunctionBegin; 7785 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7786 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7787 PetscCall(MatGetSize(Amat, &MM, &NN)); 7788 PetscCall(MatGetBlockSize(Amat, &bs)); 7789 nloc = (Iend - Istart) / bs; 7790 7791 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7792 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7793 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7794 7795 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7796 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7797 implementation */ 7798 if (bs > 1) { 7799 PetscCall(MatGetType(Amat, &jtype)); 7800 PetscCall(MatCreate(comm, &Gmat)); 7801 PetscCall(MatSetType(Gmat, jtype)); 7802 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7803 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7804 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7805 PetscInt *d_nnz, *o_nnz; 7806 MatScalar *aa, val, *AA; 7807 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7808 if (isseqaij) { 7809 a = Amat; 7810 b = NULL; 7811 } else { 7812 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7813 a = d->A; 7814 b = d->B; 7815 } 7816 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7817 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7818 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7819 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7820 const PetscInt *cols; 7821 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7822 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7823 nnz[brow / bs] = jj / bs; 7824 if (jj % bs) ok = 0; 7825 if (cols) j0 = cols[0]; 7826 else j0 = -1; 7827 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7828 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7829 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7830 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7831 if (jj % bs) ok = 0; 7832 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7833 if (nnz[brow / bs] != jj / bs) ok = 0; 7834 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7835 } 7836 if (!ok) { 7837 PetscCall(PetscFree2(d_nnz, o_nnz)); 7838 goto old_bs; 7839 } 7840 } 7841 } 7842 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7843 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7844 PetscCall(PetscFree2(d_nnz, o_nnz)); 7845 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7846 // diag 7847 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7848 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7849 ai = aseq->i; 7850 n = ai[brow + 1] - ai[brow]; 7851 aj = aseq->j + ai[brow]; 7852 for (int k = 0; k < n; k += bs) { // block columns 7853 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7854 val = 0; 7855 for (int ii = 0; ii < bs; ii++) { // rows in block 7856 aa = aseq->a + ai[brow + ii] + k; 7857 for (int jj = 0; jj < bs; jj++) { // columns in block 7858 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7859 } 7860 } 7861 AA[k / bs] = val; 7862 } 7863 grow = Istart / bs + brow / bs; 7864 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7865 } 7866 // off-diag 7867 if (ismpiaij) { 7868 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7869 const PetscScalar *vals; 7870 const PetscInt *cols, *garray = aij->garray; 7871 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7872 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7873 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7874 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7875 AA[k / bs] = 0; 7876 AJ[cidx] = garray[cols[k]] / bs; 7877 } 7878 nc = ncols / bs; 7879 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7880 for (int ii = 0; ii < bs; ii++) { // rows in block 7881 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7882 for (int k = 0; k < ncols; k += bs) { 7883 for (int jj = 0; jj < bs; jj++) { // cols in block 7884 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7885 } 7886 } 7887 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7888 } 7889 grow = Istart / bs + brow / bs; 7890 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7891 } 7892 } 7893 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7894 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7895 PetscCall(PetscFree2(AA, AJ)); 7896 } else { 7897 const PetscScalar *vals; 7898 const PetscInt *idx; 7899 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7900 old_bs: 7901 /* 7902 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7903 */ 7904 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7905 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7906 if (isseqaij) { 7907 PetscInt max_d_nnz; 7908 /* 7909 Determine exact preallocation count for (sequential) scalar matrix 7910 */ 7911 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7912 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7913 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7914 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7915 PetscCall(PetscFree3(w0, w1, w2)); 7916 } else if (ismpiaij) { 7917 Mat Daij, Oaij; 7918 const PetscInt *garray; 7919 PetscInt max_d_nnz; 7920 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7921 /* 7922 Determine exact preallocation count for diagonal block portion of scalar matrix 7923 */ 7924 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7925 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7926 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7927 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7928 PetscCall(PetscFree3(w0, w1, w2)); 7929 /* 7930 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7931 */ 7932 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7933 o_nnz[jj] = 0; 7934 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7935 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7936 o_nnz[jj] += ncols; 7937 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7938 } 7939 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7940 } 7941 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7942 /* get scalar copy (norms) of matrix */ 7943 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7944 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7945 PetscCall(PetscFree2(d_nnz, o_nnz)); 7946 for (Ii = Istart; Ii < Iend; Ii++) { 7947 PetscInt dest_row = Ii / bs; 7948 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7949 for (jj = 0; jj < ncols; jj++) { 7950 PetscInt dest_col = idx[jj] / bs; 7951 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7952 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7953 } 7954 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7955 } 7956 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7957 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7958 } 7959 } else { 7960 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7961 else { 7962 Gmat = Amat; 7963 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7964 } 7965 if (isseqaij) { 7966 a = Gmat; 7967 b = NULL; 7968 } else { 7969 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7970 a = d->A; 7971 b = d->B; 7972 } 7973 if (filter >= 0 || scale) { 7974 /* take absolute value of each entry */ 7975 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7976 MatInfo info; 7977 PetscScalar *avals; 7978 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7979 PetscCall(MatSeqAIJGetArray(c, &avals)); 7980 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7981 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7982 } 7983 } 7984 } 7985 if (symmetrize) { 7986 PetscBool isset, issym; 7987 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7988 if (!isset || !issym) { 7989 Mat matTrans; 7990 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7991 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7992 PetscCall(MatDestroy(&matTrans)); 7993 } 7994 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7995 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7996 if (scale) { 7997 /* scale c for all diagonal values = 1 or -1 */ 7998 Vec diag; 7999 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8000 PetscCall(MatGetDiagonal(Gmat, diag)); 8001 PetscCall(VecReciprocal(diag)); 8002 PetscCall(VecSqrtAbs(diag)); 8003 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8004 PetscCall(VecDestroy(&diag)); 8005 } 8006 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8007 8008 if (filter >= 0) { 8009 Mat Fmat = NULL; /* some silly compiler needs this */ 8010 8011 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8012 PetscCall(MatDestroy(&Gmat)); 8013 Gmat = Fmat; 8014 } 8015 *a_Gmat = Gmat; 8016 PetscFunctionReturn(PETSC_SUCCESS); 8017 } 8018 8019 /* 8020 Special version for direct calls from Fortran 8021 */ 8022 #include <petsc/private/fortranimpl.h> 8023 8024 /* Change these macros so can be used in void function */ 8025 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8026 #undef PetscCall 8027 #define PetscCall(...) \ 8028 do { \ 8029 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8030 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8031 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8032 return; \ 8033 } \ 8034 } while (0) 8035 8036 #undef SETERRQ 8037 #define SETERRQ(comm, ierr, ...) \ 8038 do { \ 8039 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8040 return; \ 8041 } while (0) 8042 8043 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8044 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8045 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8046 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8047 #else 8048 #endif 8049 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8050 { 8051 Mat mat = *mmat; 8052 PetscInt m = *mm, n = *mn; 8053 InsertMode addv = *maddv; 8054 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8055 PetscScalar value; 8056 8057 MatCheckPreallocated(mat, 1); 8058 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8059 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8060 { 8061 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8062 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8063 PetscBool roworiented = aij->roworiented; 8064 8065 /* Some Variables required in the macro */ 8066 Mat A = aij->A; 8067 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8068 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8069 MatScalar *aa; 8070 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8071 Mat B = aij->B; 8072 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8073 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8074 MatScalar *ba; 8075 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8076 * cannot use "#if defined" inside a macro. */ 8077 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8078 8079 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8080 PetscInt nonew = a->nonew; 8081 MatScalar *ap1, *ap2; 8082 8083 PetscFunctionBegin; 8084 PetscCall(MatSeqAIJGetArray(A, &aa)); 8085 PetscCall(MatSeqAIJGetArray(B, &ba)); 8086 for (i = 0; i < m; i++) { 8087 if (im[i] < 0) continue; 8088 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8089 if (im[i] >= rstart && im[i] < rend) { 8090 row = im[i] - rstart; 8091 lastcol1 = -1; 8092 rp1 = aj + ai[row]; 8093 ap1 = aa + ai[row]; 8094 rmax1 = aimax[row]; 8095 nrow1 = ailen[row]; 8096 low1 = 0; 8097 high1 = nrow1; 8098 lastcol2 = -1; 8099 rp2 = bj + bi[row]; 8100 ap2 = ba + bi[row]; 8101 rmax2 = bimax[row]; 8102 nrow2 = bilen[row]; 8103 low2 = 0; 8104 high2 = nrow2; 8105 8106 for (j = 0; j < n; j++) { 8107 if (roworiented) value = v[i * n + j]; 8108 else value = v[i + j * m]; 8109 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8110 if (in[j] >= cstart && in[j] < cend) { 8111 col = in[j] - cstart; 8112 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8113 } else if (in[j] < 0) continue; 8114 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8115 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8116 } else { 8117 if (mat->was_assembled) { 8118 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8119 #if defined(PETSC_USE_CTABLE) 8120 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8121 col--; 8122 #else 8123 col = aij->colmap[in[j]] - 1; 8124 #endif 8125 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8126 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8127 col = in[j]; 8128 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8129 B = aij->B; 8130 b = (Mat_SeqAIJ *)B->data; 8131 bimax = b->imax; 8132 bi = b->i; 8133 bilen = b->ilen; 8134 bj = b->j; 8135 rp2 = bj + bi[row]; 8136 ap2 = ba + bi[row]; 8137 rmax2 = bimax[row]; 8138 nrow2 = bilen[row]; 8139 low2 = 0; 8140 high2 = nrow2; 8141 bm = aij->B->rmap->n; 8142 ba = b->a; 8143 inserted = PETSC_FALSE; 8144 } 8145 } else col = in[j]; 8146 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8147 } 8148 } 8149 } else if (!aij->donotstash) { 8150 if (roworiented) { 8151 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8152 } else { 8153 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8154 } 8155 } 8156 } 8157 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8158 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8159 } 8160 PetscFunctionReturnVoid(); 8161 } 8162 8163 /* Undefining these here since they were redefined from their original definition above! No 8164 * other PETSc functions should be defined past this point, as it is impossible to recover the 8165 * original definitions */ 8166 #undef PetscCall 8167 #undef SETERRQ 8168