1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow down the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool all_assembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any process has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no process disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 MatGetRowMaxAbs_MPIAIJ, 2793 /*69*/ MatGetRowMinAbs_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFDColoringApply_AIJ, 2797 MatSetFromOptions_MPIAIJ, 2798 MatFindZeroDiagonals_MPIAIJ, 2799 /*75*/ NULL, 2800 NULL, 2801 NULL, 2802 MatLoad_MPIAIJ, 2803 NULL, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 /*89*/ MatBindToCPU_MPIAIJ, 2814 MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 /*94*/ NULL, 2819 MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 /*99*/ NULL, 2824 NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 /*104*/ MatMissingDiagonal_MPIAIJ, 2829 MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 /*109*/ NULL, 2834 MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*114*/ MatGetMultiProcBlock_MPIAIJ, 2839 MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 /*119*/ MatCreateSubMatricesMPI_MPIAIJ, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*124*/ NULL, 2849 NULL, 2850 NULL, 2851 MatSetBlockSizes_MPIAIJ, 2852 NULL, 2853 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 NULL, 2857 NULL, 2858 /*134*/ NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 MatEliminateZeros_MPIAIJ, 2862 MatGetRowSumAbs_MPIAIJ, 2863 /*139*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCopyHashToXAIJ_MPI_Hash, 2867 MatGetCurrentMemType_MPIAIJ}; 2868 2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2870 { 2871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2872 2873 PetscFunctionBegin; 2874 PetscCall(MatStoreValues(aij->A)); 2875 PetscCall(MatStoreValues(aij->B)); 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2880 { 2881 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2882 2883 PetscFunctionBegin; 2884 PetscCall(MatRetrieveValues(aij->A)); 2885 PetscCall(MatRetrieveValues(aij->B)); 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2890 { 2891 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2892 PetscMPIInt size; 2893 2894 PetscFunctionBegin; 2895 if (B->hash_active) { 2896 B->ops[0] = b->cops; 2897 B->hash_active = PETSC_FALSE; 2898 } 2899 PetscCall(PetscLayoutSetUp(B->rmap)); 2900 PetscCall(PetscLayoutSetUp(B->cmap)); 2901 2902 #if defined(PETSC_USE_CTABLE) 2903 PetscCall(PetscHMapIDestroy(&b->colmap)); 2904 #else 2905 PetscCall(PetscFree(b->colmap)); 2906 #endif 2907 PetscCall(PetscFree(b->garray)); 2908 PetscCall(VecDestroy(&b->lvec)); 2909 PetscCall(VecScatterDestroy(&b->Mvctx)); 2910 2911 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2912 2913 MatSeqXAIJGetOptions_Private(b->B); 2914 PetscCall(MatDestroy(&b->B)); 2915 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2916 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2917 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2918 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2919 MatSeqXAIJRestoreOptions_Private(b->B); 2920 2921 MatSeqXAIJGetOptions_Private(b->A); 2922 PetscCall(MatDestroy(&b->A)); 2923 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2924 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2925 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2926 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2927 MatSeqXAIJRestoreOptions_Private(b->A); 2928 2929 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2930 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2931 B->preallocated = PETSC_TRUE; 2932 B->was_assembled = PETSC_FALSE; 2933 B->assembled = PETSC_FALSE; 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2938 { 2939 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2940 PetscBool ondiagreset, offdiagreset, memoryreset; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2944 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2945 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2946 2947 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2948 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2949 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2950 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2951 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2952 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2956 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2957 PetscCall(VecScatterDestroy(&b->Mvctx)); 2958 2959 B->preallocated = PETSC_TRUE; 2960 B->was_assembled = PETSC_FALSE; 2961 B->assembled = PETSC_FALSE; 2962 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2963 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2964 PetscFunctionReturn(PETSC_SUCCESS); 2965 } 2966 2967 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2968 { 2969 Mat mat; 2970 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2971 2972 PetscFunctionBegin; 2973 *newmat = NULL; 2974 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2975 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2976 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2977 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2978 a = (Mat_MPIAIJ *)mat->data; 2979 2980 mat->factortype = matin->factortype; 2981 mat->assembled = matin->assembled; 2982 mat->insertmode = NOT_SET_VALUES; 2983 2984 a->size = oldmat->size; 2985 a->rank = oldmat->rank; 2986 a->donotstash = oldmat->donotstash; 2987 a->roworiented = oldmat->roworiented; 2988 a->rowindices = NULL; 2989 a->rowvalues = NULL; 2990 a->getrowactive = PETSC_FALSE; 2991 2992 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2993 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2994 if (matin->hash_active) { 2995 PetscCall(MatSetUp(mat)); 2996 } else { 2997 mat->preallocated = matin->preallocated; 2998 if (oldmat->colmap) { 2999 #if defined(PETSC_USE_CTABLE) 3000 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3001 #else 3002 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3003 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3004 #endif 3005 } else a->colmap = NULL; 3006 if (oldmat->garray) { 3007 PetscInt len; 3008 len = oldmat->B->cmap->n; 3009 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3010 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3011 } else a->garray = NULL; 3012 3013 /* It may happen MatDuplicate is called with a non-assembled matrix 3014 In fact, MatDuplicate only requires the matrix to be preallocated 3015 This may happen inside a DMCreateMatrix_Shell */ 3016 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3017 if (oldmat->Mvctx) { 3018 a->Mvctx = oldmat->Mvctx; 3019 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3020 } 3021 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3022 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3023 } 3024 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3025 *newmat = mat; 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3030 { 3031 PetscBool isbinary, ishdf5; 3032 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3035 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3036 /* force binary viewer to load .info file if it has not yet done so */ 3037 PetscCall(PetscViewerSetUp(viewer)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3040 if (isbinary) { 3041 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3042 } else if (ishdf5) { 3043 #if defined(PETSC_HAVE_HDF5) 3044 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3045 #else 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3047 #endif 3048 } else { 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3055 { 3056 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3057 PetscInt *rowidxs, *colidxs; 3058 PetscScalar *matvals; 3059 3060 PetscFunctionBegin; 3061 PetscCall(PetscViewerSetUp(viewer)); 3062 3063 /* read in matrix header */ 3064 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3065 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3066 M = header[1]; 3067 N = header[2]; 3068 nz = header[3]; 3069 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3070 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3071 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3072 3073 /* set block sizes from the viewer's .info file */ 3074 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3075 /* set global sizes if not set already */ 3076 if (mat->rmap->N < 0) mat->rmap->N = M; 3077 if (mat->cmap->N < 0) mat->cmap->N = N; 3078 PetscCall(PetscLayoutSetUp(mat->rmap)); 3079 PetscCall(PetscLayoutSetUp(mat->cmap)); 3080 3081 /* check if the matrix sizes are correct */ 3082 PetscCall(MatGetSize(mat, &rows, &cols)); 3083 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3084 3085 /* read in row lengths and build row indices */ 3086 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3087 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3088 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3089 rowidxs[0] = 0; 3090 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3091 if (nz != PETSC_INT_MAX) { 3092 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3093 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3094 } 3095 3096 /* read in column indices and matrix values */ 3097 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3100 /* store matrix indices and values */ 3101 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3102 PetscCall(PetscFree(rowidxs)); 3103 PetscCall(PetscFree2(colidxs, matvals)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3109 { 3110 IS iscol_local; 3111 PetscBool isstride; 3112 PetscMPIInt gisstride = 0; 3113 3114 PetscFunctionBegin; 3115 /* check if we are grabbing all columns*/ 3116 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3117 3118 if (isstride) { 3119 PetscInt start, len, mstart, mlen; 3120 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3121 PetscCall(ISGetLocalSize(iscol, &len)); 3122 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3123 if (mstart == start && mlen - mstart == len) gisstride = 1; 3124 } 3125 3126 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3127 if (gisstride) { 3128 PetscInt N; 3129 PetscCall(MatGetSize(mat, NULL, &N)); 3130 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3131 PetscCall(ISSetIdentity(iscol_local)); 3132 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3133 } else { 3134 PetscInt cbs; 3135 PetscCall(ISGetBlockSize(iscol, &cbs)); 3136 PetscCall(ISAllGather(iscol, &iscol_local)); 3137 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3138 } 3139 3140 *isseq = iscol_local; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 /* 3145 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3146 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3147 3148 Input Parameters: 3149 + mat - matrix 3150 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->rstart <= isrow[i] < mat->rend 3152 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->cstart <= iscol[i] < mat->cend 3154 3155 Output Parameters: 3156 + isrow_d - sequential row index set for retrieving mat->A 3157 . iscol_d - sequential column index set for retrieving mat->A 3158 . iscol_o - sequential column index set for retrieving mat->B 3159 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3160 */ 3161 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3162 { 3163 Vec x, cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray, *cmaparray; 3166 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3168 Mat B = a->B; 3169 Vec lvec = a->lvec, lcmap; 3170 PetscInt i, cstart, cend, Bn = B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx = a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3176 PetscCall(ISGetLocalSize(iscol, &ncols)); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 PetscCall(MatCreateVecs(mat, &x, NULL)); 3180 PetscCall(VecSet(x, -1.0)); 3181 PetscCall(VecDuplicate(x, &cmap)); 3182 PetscCall(VecSet(cmap, -1.0)); 3183 3184 /* Get start indices */ 3185 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3186 isstart -= ncols; 3187 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3188 3189 PetscCall(ISGetIndices(iscol, &is_idx)); 3190 PetscCall(VecGetArray(x, &xarray)); 3191 PetscCall(VecGetArray(cmap, &cmaparray)); 3192 PetscCall(PetscMalloc1(ncols, &idx)); 3193 for (i = 0; i < ncols; i++) { 3194 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3197 } 3198 PetscCall(VecRestoreArray(x, &xarray)); 3199 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3200 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3201 3202 /* Get iscol_d */ 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3204 PetscCall(ISGetBlockSize(iscol, &i)); 3205 PetscCall(ISSetBlockSize(*iscol_d, i)); 3206 3207 /* Get isrow_d */ 3208 PetscCall(ISGetLocalSize(isrow, &m)); 3209 rstart = mat->rmap->rstart; 3210 PetscCall(PetscMalloc1(m, &idx)); 3211 PetscCall(ISGetIndices(isrow, &is_idx)); 3212 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3213 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3214 3215 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3216 PetscCall(ISGetBlockSize(isrow, &i)); 3217 PetscCall(ISSetBlockSize(*isrow_d, i)); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 PetscCall(VecDuplicate(lvec, &lcmap)); 3224 3225 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 PetscCall(PetscMalloc1(Bn, &idx)); 3232 PetscCall(PetscMalloc1(Bn, &cmap1)); 3233 3234 PetscCall(VecGetArray(lvec, &xarray)); 3235 PetscCall(VecGetArray(lcmap, &cmaparray)); 3236 for (i = 0; i < Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 PetscCall(VecRestoreArray(lvec, &xarray)); 3244 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3245 3246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 PetscCall(PetscFree(idx)); 3250 *garray = cmap1; 3251 3252 PetscCall(VecDestroy(&x)); 3253 PetscCall(VecDestroy(&cmap)); 3254 PetscCall(VecDestroy(&lcmap)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3260 { 3261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3262 Mat M = NULL; 3263 MPI_Comm comm; 3264 IS iscol_d, isrow_d, iscol_o; 3265 Mat Asub = NULL, Bsub = NULL; 3266 PetscInt n, count, M_size, N_size; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3270 3271 if (call == MAT_REUSE_MATRIX) { 3272 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3274 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3277 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3280 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3281 3282 /* Update diagonal and off-diagonal portions of submat */ 3283 asub = (Mat_MPIAIJ *)(*submat)->data; 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3285 PetscCall(ISGetLocalSize(iscol_o, &n)); 3286 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3287 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3288 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3289 3290 } else { /* call == MAT_INITIAL_MATRIX) */ 3291 PetscInt *garray, *garray_compact; 3292 PetscInt BsubN; 3293 3294 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3295 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3296 3297 /* Create local submatrices Asub and Bsub */ 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3300 3301 // Compact garray so its not of size Bn 3302 PetscCall(ISGetSize(iscol_o, &count)); 3303 PetscCall(PetscMalloc1(count, &garray_compact)); 3304 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3305 3306 /* Create submatrix M */ 3307 PetscCall(ISGetSize(isrow, &M_size)); 3308 PetscCall(ISGetSize(iscol, &N_size)); 3309 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3310 3311 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3312 asub = (Mat_MPIAIJ *)M->data; 3313 3314 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3315 n = asub->B->cmap->N; 3316 if (BsubN > n) { 3317 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3318 const PetscInt *idx; 3319 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3320 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3321 3322 PetscCall(PetscMalloc1(n, &idx_new)); 3323 j = 0; 3324 PetscCall(ISGetIndices(iscol_o, &idx)); 3325 for (i = 0; i < n; i++) { 3326 if (j >= BsubN) break; 3327 while (subgarray[i] > garray[j]) j++; 3328 3329 if (subgarray[i] == garray[j]) { 3330 idx_new[i] = idx[j++]; 3331 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3332 } 3333 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3334 3335 PetscCall(ISDestroy(&iscol_o)); 3336 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3337 3338 } else if (BsubN < n) { 3339 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3340 } 3341 3342 PetscCall(PetscFree(garray)); 3343 *submat = M; 3344 3345 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3346 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3347 PetscCall(ISDestroy(&isrow_d)); 3348 3349 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3350 PetscCall(ISDestroy(&iscol_d)); 3351 3352 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3353 PetscCall(ISDestroy(&iscol_o)); 3354 } 3355 PetscFunctionReturn(PETSC_SUCCESS); 3356 } 3357 3358 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3359 { 3360 IS iscol_local = NULL, isrow_d; 3361 PetscInt csize; 3362 PetscInt n, i, j, start, end; 3363 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3364 MPI_Comm comm; 3365 3366 PetscFunctionBegin; 3367 /* If isrow has same processor distribution as mat, 3368 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3369 if (call == MAT_REUSE_MATRIX) { 3370 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3371 if (isrow_d) { 3372 sameRowDist = PETSC_TRUE; 3373 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3374 } else { 3375 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3376 if (iscol_local) { 3377 sameRowDist = PETSC_TRUE; 3378 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3379 } 3380 } 3381 } else { 3382 /* Check if isrow has same processor distribution as mat */ 3383 sameDist[0] = PETSC_FALSE; 3384 PetscCall(ISGetLocalSize(isrow, &n)); 3385 if (!n) { 3386 sameDist[0] = PETSC_TRUE; 3387 } else { 3388 PetscCall(ISGetMinMax(isrow, &i, &j)); 3389 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3390 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3391 } 3392 3393 /* Check if iscol has same processor distribution as mat */ 3394 sameDist[1] = PETSC_FALSE; 3395 PetscCall(ISGetLocalSize(iscol, &n)); 3396 if (!n) { 3397 sameDist[1] = PETSC_TRUE; 3398 } else { 3399 PetscCall(ISGetMinMax(iscol, &i, &j)); 3400 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3401 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3402 } 3403 3404 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3405 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3406 sameRowDist = tsameDist[0]; 3407 } 3408 3409 if (sameRowDist) { 3410 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3411 /* isrow and iscol have same processor distribution as mat */ 3412 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } else { /* sameRowDist */ 3415 /* isrow has same processor distribution as mat */ 3416 if (call == MAT_INITIAL_MATRIX) { 3417 PetscBool sorted; 3418 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3419 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3420 PetscCall(ISGetSize(iscol, &i)); 3421 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3422 3423 PetscCall(ISSorted(iscol_local, &sorted)); 3424 if (sorted) { 3425 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 } else { /* call == MAT_REUSE_MATRIX */ 3430 IS iscol_sub; 3431 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3432 if (iscol_sub) { 3433 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3434 PetscFunctionReturn(PETSC_SUCCESS); 3435 } 3436 } 3437 } 3438 } 3439 3440 /* General case: iscol -> iscol_local which has global size of iscol */ 3441 if (call == MAT_REUSE_MATRIX) { 3442 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3443 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3444 } else { 3445 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3446 } 3447 3448 PetscCall(ISGetLocalSize(iscol, &csize)); 3449 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3450 3451 if (call == MAT_INITIAL_MATRIX) { 3452 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3453 PetscCall(ISDestroy(&iscol_local)); 3454 } 3455 PetscFunctionReturn(PETSC_SUCCESS); 3456 } 3457 3458 /*@C 3459 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3460 and "off-diagonal" part of the matrix in CSR format. 3461 3462 Collective 3463 3464 Input Parameters: 3465 + comm - MPI communicator 3466 . M - the global row size 3467 . N - the global column size 3468 . A - "diagonal" portion of matrix 3469 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3470 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3471 3472 Output Parameter: 3473 . mat - the matrix, with input `A` as its local diagonal matrix 3474 3475 Level: advanced 3476 3477 Notes: 3478 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3479 3480 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3481 3482 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3483 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3484 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3485 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3486 3487 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3488 3489 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3490 @*/ 3491 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3492 { 3493 PetscInt m, n; 3494 MatType mpi_mat_type; 3495 Mat_MPIAIJ *mpiaij; 3496 Mat C; 3497 3498 PetscFunctionBegin; 3499 PetscCall(MatCreate(comm, &C)); 3500 PetscCall(MatGetSize(A, &m, &n)); 3501 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3502 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3503 3504 PetscCall(MatSetSizes(C, m, n, M, N)); 3505 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3506 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3507 PetscCall(MatSetType(C, mpi_mat_type)); 3508 if (!garray) { 3509 const PetscScalar *ba; 3510 3511 B->nonzerostate++; 3512 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3513 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3514 } 3515 3516 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3517 PetscCall(PetscLayoutSetUp(C->rmap)); 3518 PetscCall(PetscLayoutSetUp(C->cmap)); 3519 3520 mpiaij = (Mat_MPIAIJ *)C->data; 3521 mpiaij->A = A; 3522 mpiaij->B = B; 3523 mpiaij->garray = garray; 3524 C->preallocated = PETSC_TRUE; 3525 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3526 3527 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3528 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3529 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3530 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3531 */ 3532 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3533 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3534 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3535 *mat = C; 3536 PetscFunctionReturn(PETSC_SUCCESS); 3537 } 3538 3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3540 3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3542 { 3543 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3544 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3545 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3546 Mat M, Msub, B = a->B; 3547 MatScalar *aa; 3548 Mat_SeqAIJ *aij; 3549 PetscInt *garray = a->garray, *colsub, Ncols; 3550 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3551 IS iscol_sub, iscmap; 3552 const PetscInt *is_idx, *cmap; 3553 PetscBool allcolumns = PETSC_FALSE; 3554 MPI_Comm comm; 3555 3556 PetscFunctionBegin; 3557 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3558 if (call == MAT_REUSE_MATRIX) { 3559 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3560 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3561 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3562 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3564 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3565 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3567 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3568 3569 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3570 3571 } else { /* call == MAT_INITIAL_MATRIX) */ 3572 PetscBool flg; 3573 3574 PetscCall(ISGetLocalSize(iscol, &n)); 3575 PetscCall(ISGetSize(iscol, &Ncols)); 3576 3577 /* (1) iscol -> nonscalable iscol_local */ 3578 /* Check for special case: each processor gets entire matrix columns */ 3579 PetscCall(ISIdentity(iscol_local, &flg)); 3580 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3581 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3582 if (allcolumns) { 3583 iscol_sub = iscol_local; 3584 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3585 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3586 3587 } else { 3588 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3589 PetscInt *idx, *cmap1, k; 3590 PetscCall(PetscMalloc1(Ncols, &idx)); 3591 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3592 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3593 count = 0; 3594 k = 0; 3595 for (i = 0; i < Ncols; i++) { 3596 j = is_idx[i]; 3597 if (j >= cstart && j < cend) { 3598 /* diagonal part of mat */ 3599 idx[count] = j; 3600 cmap1[count++] = i; /* column index in submat */ 3601 } else if (Bn) { 3602 /* off-diagonal part of mat */ 3603 if (j == garray[k]) { 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (j > garray[k]) { 3607 while (j > garray[k] && k < Bn - 1) k++; 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } 3612 } 3613 } 3614 } 3615 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3616 3617 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3618 PetscCall(ISGetBlockSize(iscol, &cbs)); 3619 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3620 3621 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3622 } 3623 3624 /* (3) Create sequential Msub */ 3625 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3626 } 3627 3628 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3629 aij = (Mat_SeqAIJ *)Msub->data; 3630 ii = aij->i; 3631 PetscCall(ISGetIndices(iscmap, &cmap)); 3632 3633 /* 3634 m - number of local rows 3635 Ncols - number of columns (same on all processors) 3636 rstart - first row in new global matrix generated 3637 */ 3638 PetscCall(MatGetSize(Msub, &m, NULL)); 3639 3640 if (call == MAT_INITIAL_MATRIX) { 3641 /* (4) Create parallel newmat */ 3642 PetscMPIInt rank, size; 3643 PetscInt csize; 3644 3645 PetscCallMPI(MPI_Comm_size(comm, &size)); 3646 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3647 3648 /* 3649 Determine the number of non-zeros in the diagonal and off-diagonal 3650 portions of the matrix in order to do correct preallocation 3651 */ 3652 3653 /* first get start and end of "diagonal" columns */ 3654 PetscCall(ISGetLocalSize(iscol, &csize)); 3655 if (csize == PETSC_DECIDE) { 3656 PetscCall(ISGetSize(isrow, &mglobal)); 3657 if (mglobal == Ncols) { /* square matrix */ 3658 nlocal = m; 3659 } else { 3660 nlocal = Ncols / size + ((Ncols % size) > rank); 3661 } 3662 } else { 3663 nlocal = csize; 3664 } 3665 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3666 rstart = rend - nlocal; 3667 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3668 3669 /* next, compute all the lengths */ 3670 jj = aij->j; 3671 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3672 olens = dlens + m; 3673 for (i = 0; i < m; i++) { 3674 jend = ii[i + 1] - ii[i]; 3675 olen = 0; 3676 dlen = 0; 3677 for (j = 0; j < jend; j++) { 3678 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3679 else dlen++; 3680 jj++; 3681 } 3682 olens[i] = olen; 3683 dlens[i] = dlen; 3684 } 3685 3686 PetscCall(ISGetBlockSize(isrow, &bs)); 3687 PetscCall(ISGetBlockSize(iscol, &cbs)); 3688 3689 PetscCall(MatCreate(comm, &M)); 3690 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3691 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3692 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3693 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3694 PetscCall(PetscFree(dlens)); 3695 3696 } else { /* call == MAT_REUSE_MATRIX */ 3697 M = *newmat; 3698 PetscCall(MatGetLocalSize(M, &i, NULL)); 3699 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3700 PetscCall(MatZeroEntries(M)); 3701 /* 3702 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3703 rather than the slower MatSetValues(). 3704 */ 3705 M->was_assembled = PETSC_TRUE; 3706 M->assembled = PETSC_FALSE; 3707 } 3708 3709 /* (5) Set values of Msub to *newmat */ 3710 PetscCall(PetscMalloc1(count, &colsub)); 3711 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3712 3713 jj = aij->j; 3714 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3715 for (i = 0; i < m; i++) { 3716 row = rstart + i; 3717 nz = ii[i + 1] - ii[i]; 3718 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3719 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3720 jj += nz; 3721 aa += nz; 3722 } 3723 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3724 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3725 3726 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3727 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3728 3729 PetscCall(PetscFree(colsub)); 3730 3731 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3732 if (call == MAT_INITIAL_MATRIX) { 3733 *newmat = M; 3734 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3735 PetscCall(MatDestroy(&Msub)); 3736 3737 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3738 PetscCall(ISDestroy(&iscol_sub)); 3739 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3741 PetscCall(ISDestroy(&iscmap)); 3742 3743 if (iscol_local) { 3744 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3745 PetscCall(ISDestroy(&iscol_local)); 3746 } 3747 } 3748 PetscFunctionReturn(PETSC_SUCCESS); 3749 } 3750 3751 /* 3752 Not great since it makes two copies of the submatrix, first an SeqAIJ 3753 in local and then by concatenating the local matrices the end result. 3754 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3755 3756 This requires a sequential iscol with all indices. 3757 */ 3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3759 { 3760 PetscMPIInt rank, size; 3761 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3762 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3763 Mat M, Mreuse; 3764 MatScalar *aa, *vwork; 3765 MPI_Comm comm; 3766 Mat_SeqAIJ *aij; 3767 PetscBool colflag, allcolumns = PETSC_FALSE; 3768 3769 PetscFunctionBegin; 3770 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3771 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3772 PetscCallMPI(MPI_Comm_size(comm, &size)); 3773 3774 /* Check for special case: each processor gets entire matrix columns */ 3775 PetscCall(ISIdentity(iscol, &colflag)); 3776 PetscCall(ISGetLocalSize(iscol, &n)); 3777 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3778 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3779 3780 if (call == MAT_REUSE_MATRIX) { 3781 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3782 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3783 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3784 } else { 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3786 } 3787 3788 /* 3789 m - number of local rows 3790 n - number of columns (same on all processors) 3791 rstart - first row in new global matrix generated 3792 */ 3793 PetscCall(MatGetSize(Mreuse, &m, &n)); 3794 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3795 if (call == MAT_INITIAL_MATRIX) { 3796 aij = (Mat_SeqAIJ *)Mreuse->data; 3797 ii = aij->i; 3798 jj = aij->j; 3799 3800 /* 3801 Determine the number of non-zeros in the diagonal and off-diagonal 3802 portions of the matrix in order to do correct preallocation 3803 */ 3804 3805 /* first get start and end of "diagonal" columns */ 3806 if (csize == PETSC_DECIDE) { 3807 PetscCall(ISGetSize(isrow, &mglobal)); 3808 if (mglobal == n) { /* square matrix */ 3809 nlocal = m; 3810 } else { 3811 nlocal = n / size + ((n % size) > rank); 3812 } 3813 } else { 3814 nlocal = csize; 3815 } 3816 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3817 rstart = rend - nlocal; 3818 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3819 3820 /* next, compute all the lengths */ 3821 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3822 olens = dlens + m; 3823 for (i = 0; i < m; i++) { 3824 jend = ii[i + 1] - ii[i]; 3825 olen = 0; 3826 dlen = 0; 3827 for (j = 0; j < jend; j++) { 3828 if (*jj < rstart || *jj >= rend) olen++; 3829 else dlen++; 3830 jj++; 3831 } 3832 olens[i] = olen; 3833 dlens[i] = dlen; 3834 } 3835 PetscCall(MatCreate(comm, &M)); 3836 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3837 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3838 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3839 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3840 PetscCall(PetscFree(dlens)); 3841 } else { 3842 PetscInt ml, nl; 3843 3844 M = *newmat; 3845 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3846 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3847 PetscCall(MatZeroEntries(M)); 3848 /* 3849 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3850 rather than the slower MatSetValues(). 3851 */ 3852 M->was_assembled = PETSC_TRUE; 3853 M->assembled = PETSC_FALSE; 3854 } 3855 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3856 aij = (Mat_SeqAIJ *)Mreuse->data; 3857 ii = aij->i; 3858 jj = aij->j; 3859 3860 /* trigger copy to CPU if needed */ 3861 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3862 for (i = 0; i < m; i++) { 3863 row = rstart + i; 3864 nz = ii[i + 1] - ii[i]; 3865 cwork = jj; 3866 jj = PetscSafePointerPlusOffset(jj, nz); 3867 vwork = aa; 3868 aa = PetscSafePointerPlusOffset(aa, nz); 3869 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3870 } 3871 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3872 3873 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3874 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3875 *newmat = M; 3876 3877 /* save submatrix used in processor for next request */ 3878 if (call == MAT_INITIAL_MATRIX) { 3879 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3880 PetscCall(MatDestroy(&Mreuse)); 3881 } 3882 PetscFunctionReturn(PETSC_SUCCESS); 3883 } 3884 3885 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3886 { 3887 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3888 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3889 const PetscInt *JJ; 3890 PetscBool nooffprocentries; 3891 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3892 3893 PetscFunctionBegin; 3894 PetscCall(PetscLayoutSetUp(B->rmap)); 3895 PetscCall(PetscLayoutSetUp(B->cmap)); 3896 m = B->rmap->n; 3897 cstart = B->cmap->rstart; 3898 cend = B->cmap->rend; 3899 rstart = B->rmap->rstart; 3900 irstart = Ii[0]; 3901 3902 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3903 3904 if (PetscDefined(USE_DEBUG)) { 3905 for (i = 0; i < m; i++) { 3906 nnz = Ii[i + 1] - Ii[i]; 3907 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3908 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3909 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3910 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3911 } 3912 } 3913 3914 for (i = 0; i < m; i++) { 3915 nnz = Ii[i + 1] - Ii[i]; 3916 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3917 nnz_max = PetscMax(nnz_max, nnz); 3918 d = 0; 3919 for (j = 0; j < nnz; j++) { 3920 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3921 } 3922 d_nnz[i] = d; 3923 o_nnz[i] = nnz - d; 3924 } 3925 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3926 PetscCall(PetscFree2(d_nnz, o_nnz)); 3927 3928 for (i = 0; i < m; i++) { 3929 ii = i + rstart; 3930 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3931 } 3932 nooffprocentries = B->nooffprocentries; 3933 B->nooffprocentries = PETSC_TRUE; 3934 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3935 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3936 B->nooffprocentries = nooffprocentries; 3937 3938 /* count number of entries below block diagonal */ 3939 PetscCall(PetscFree(Aij->ld)); 3940 PetscCall(PetscCalloc1(m, &ld)); 3941 Aij->ld = ld; 3942 for (i = 0; i < m; i++) { 3943 nnz = Ii[i + 1] - Ii[i]; 3944 j = 0; 3945 while (j < nnz && J[j] < cstart) j++; 3946 ld[i] = j; 3947 if (J) J += nnz; 3948 } 3949 3950 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3951 PetscFunctionReturn(PETSC_SUCCESS); 3952 } 3953 3954 /*@ 3955 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3956 (the default parallel PETSc format). 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . i - the indices into `j` for the start of each local row (indices start with zero) 3963 . j - the column indices for each local row (indices start with zero) 3964 - v - optional values in the matrix 3965 3966 Level: developer 3967 3968 Notes: 3969 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3970 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3971 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3972 3973 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3974 3975 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3976 3977 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3978 3979 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3980 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3981 3982 The format which is used for the sparse matrix input, is equivalent to a 3983 row-major ordering.. i.e for the following matrix, the input data expected is 3984 as shown 3985 .vb 3986 1 0 0 3987 2 0 3 P0 3988 ------- 3989 4 5 6 P1 3990 3991 Process0 [P0] rows_owned=[0,1] 3992 i = {0,1,3} [size = nrow+1 = 2+1] 3993 j = {0,0,2} [size = 3] 3994 v = {1,2,3} [size = 3] 3995 3996 Process1 [P1] rows_owned=[2] 3997 i = {0,3} [size = nrow+1 = 1+1] 3998 j = {0,1,2} [size = 3] 3999 v = {4,5,6} [size = 3] 4000 .ve 4001 4002 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4003 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4004 @*/ 4005 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4006 { 4007 PetscFunctionBegin; 4008 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4009 PetscFunctionReturn(PETSC_SUCCESS); 4010 } 4011 4012 /*@ 4013 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4014 (the default parallel PETSc format). For good matrix assembly performance 4015 the user should preallocate the matrix storage by setting the parameters 4016 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4017 4018 Collective 4019 4020 Input Parameters: 4021 + B - the matrix 4022 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4023 (same value is used for all local rows) 4024 . d_nnz - array containing the number of nonzeros in the various rows of the 4025 DIAGONAL portion of the local submatrix (possibly different for each row) 4026 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4027 The size of this array is equal to the number of local rows, i.e 'm'. 4028 For matrices that will be factored, you must leave room for (and set) 4029 the diagonal entry even if it is zero. 4030 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4031 submatrix (same value is used for all local rows). 4032 - o_nnz - array containing the number of nonzeros in the various rows of the 4033 OFF-DIAGONAL portion of the local submatrix (possibly different for 4034 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4035 structure. The size of this array is equal to the number 4036 of local rows, i.e 'm'. 4037 4038 Example Usage: 4039 Consider the following 8x8 matrix with 34 non-zero values, that is 4040 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4041 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4042 as follows 4043 4044 .vb 4045 1 2 0 | 0 3 0 | 0 4 4046 Proc0 0 5 6 | 7 0 0 | 8 0 4047 9 0 10 | 11 0 0 | 12 0 4048 ------------------------------------- 4049 13 0 14 | 15 16 17 | 0 0 4050 Proc1 0 18 0 | 19 20 21 | 0 0 4051 0 0 0 | 22 23 0 | 24 0 4052 ------------------------------------- 4053 Proc2 25 26 27 | 0 0 28 | 29 0 4054 30 0 0 | 31 32 33 | 0 34 4055 .ve 4056 4057 This can be represented as a collection of submatrices as 4058 .vb 4059 A B C 4060 D E F 4061 G H I 4062 .ve 4063 4064 Where the submatrices A,B,C are owned by proc0, D,E,F are 4065 owned by proc1, G,H,I are owned by proc2. 4066 4067 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4069 The 'M','N' parameters are 8,8, and have the same values on all procs. 4070 4071 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4072 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4073 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4074 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4075 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4076 matrix, and [DF] as another `MATSEQAIJ` matrix. 4077 4078 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4079 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4080 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4081 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4082 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4083 In this case, the values of `d_nz`, `o_nz` are 4084 .vb 4085 proc0 dnz = 2, o_nz = 2 4086 proc1 dnz = 3, o_nz = 2 4087 proc2 dnz = 1, o_nz = 4 4088 .ve 4089 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4090 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4091 for proc3. i.e we are using 12+15+10=37 storage locations to store 4092 34 values. 4093 4094 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4095 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4096 In the above case the values for `d_nnz`, `o_nnz` are 4097 .vb 4098 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4099 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4100 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4101 .ve 4102 Here the space allocated is sum of all the above values i.e 34, and 4103 hence pre-allocation is perfect. 4104 4105 Level: intermediate 4106 4107 Notes: 4108 If the *_nnz parameter is given then the *_nz parameter is ignored 4109 4110 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4111 storage. The stored row and column indices begin with zero. 4112 See [Sparse Matrices](sec_matsparse) for details. 4113 4114 The parallel matrix is partitioned such that the first m0 rows belong to 4115 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4116 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4117 4118 The DIAGONAL portion of the local submatrix of a processor can be defined 4119 as the submatrix which is obtained by extraction the part corresponding to 4120 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4121 first row that belongs to the processor, r2 is the last row belonging to 4122 the this processor, and c1-c2 is range of indices of the local part of a 4123 vector suitable for applying the matrix to. This is an mxn matrix. In the 4124 common case of a square matrix, the row and column ranges are the same and 4125 the DIAGONAL part is also square. The remaining portion of the local 4126 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4127 4128 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4129 4130 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4131 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4132 You can also run with the option `-info` and look for messages with the string 4133 malloc in them to see if additional memory allocation was needed. 4134 4135 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4158 calculated if `N` is given) For square matrices n is almost always `m`. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4161 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - global column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4173 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4174 4175 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4176 4177 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4178 4179 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4180 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4181 4182 The format which is used for the sparse matrix input, is equivalent to a 4183 row-major ordering, i.e., for the following matrix, the input data expected is 4184 as shown 4185 .vb 4186 1 0 0 4187 2 0 3 P0 4188 ------- 4189 4 5 6 P1 4190 4191 Process0 [P0] rows_owned=[0,1] 4192 i = {0,1,3} [size = nrow+1 = 2+1] 4193 j = {0,0,2} [size = 3] 4194 v = {1,2,3} [size = 3] 4195 4196 Process1 [P1] rows_owned=[2] 4197 i = {0,3} [size = nrow+1 = 1+1] 4198 j = {0,1,2} [size = 3] 4199 v = {4,5,6} [size = 3] 4200 .ve 4201 4202 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4203 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4204 @*/ 4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4206 { 4207 PetscFunctionBegin; 4208 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4209 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4210 PetscCall(MatCreate(comm, mat)); 4211 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4212 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4213 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4214 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4215 PetscFunctionReturn(PETSC_SUCCESS); 4216 } 4217 4218 /*@ 4219 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4220 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4221 from `MatCreateMPIAIJWithArrays()` 4222 4223 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4224 4225 Collective 4226 4227 Input Parameters: 4228 + mat - the matrix 4229 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4230 . n - This value should be the same as the local size used in creating the 4231 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4232 calculated if N is given) For square matrices n is almost always m. 4233 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4234 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4235 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4236 . J - column indices 4237 - v - matrix values 4238 4239 Level: deprecated 4240 4241 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4242 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4243 @*/ 4244 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4245 { 4246 PetscInt nnz, i; 4247 PetscBool nooffprocentries; 4248 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4249 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4250 PetscScalar *ad, *ao; 4251 PetscInt ldi, Iii, md; 4252 const PetscInt *Adi = Ad->i; 4253 PetscInt *ld = Aij->ld; 4254 4255 PetscFunctionBegin; 4256 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4257 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4258 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4260 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4262 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4263 4264 for (i = 0; i < m; i++) { 4265 if (PetscDefined(USE_DEBUG)) { 4266 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4267 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4268 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4269 } 4270 } 4271 nnz = Ii[i + 1] - Ii[i]; 4272 Iii = Ii[i]; 4273 ldi = ld[i]; 4274 md = Adi[i + 1] - Adi[i]; 4275 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4276 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4277 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4278 ad += md; 4279 ao += nnz - md; 4280 } 4281 nooffprocentries = mat->nooffprocentries; 4282 mat->nooffprocentries = PETSC_TRUE; 4283 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4284 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4285 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4288 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4289 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4290 mat->nooffprocentries = nooffprocentries; 4291 PetscFunctionReturn(PETSC_SUCCESS); 4292 } 4293 4294 /*@ 4295 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4296 4297 Collective 4298 4299 Input Parameters: 4300 + mat - the matrix 4301 - v - matrix values, stored by row 4302 4303 Level: intermediate 4304 4305 Notes: 4306 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4307 4308 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4309 4310 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4311 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4312 @*/ 4313 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4314 { 4315 PetscInt nnz, i, m; 4316 PetscBool nooffprocentries; 4317 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4318 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4319 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4320 PetscScalar *ad, *ao; 4321 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4322 PetscInt ldi, Iii, md; 4323 PetscInt *ld = Aij->ld; 4324 4325 PetscFunctionBegin; 4326 m = mat->rmap->n; 4327 4328 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4329 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4330 Iii = 0; 4331 for (i = 0; i < m; i++) { 4332 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4333 ldi = ld[i]; 4334 md = Adi[i + 1] - Adi[i]; 4335 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4336 ad += md; 4337 if (ao) { 4338 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4339 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4340 ao += nnz - md; 4341 } 4342 Iii += nnz; 4343 } 4344 nooffprocentries = mat->nooffprocentries; 4345 mat->nooffprocentries = PETSC_TRUE; 4346 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4347 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4348 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4351 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4352 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4353 mat->nooffprocentries = nooffprocentries; 4354 PetscFunctionReturn(PETSC_SUCCESS); 4355 } 4356 4357 /*@ 4358 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4359 (the default parallel PETSc format). For good matrix assembly performance 4360 the user should preallocate the matrix storage by setting the parameters 4361 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4362 4363 Collective 4364 4365 Input Parameters: 4366 + comm - MPI communicator 4367 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4368 This value should be the same as the local size used in creating the 4369 y vector for the matrix-vector product y = Ax. 4370 . n - This value should be the same as the local size used in creating the 4371 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4372 calculated if N is given) For square matrices n is almost always m. 4373 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4374 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4375 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4376 (same value is used for all local rows) 4377 . d_nnz - array containing the number of nonzeros in the various rows of the 4378 DIAGONAL portion of the local submatrix (possibly different for each row) 4379 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4380 The size of this array is equal to the number of local rows, i.e 'm'. 4381 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4382 submatrix (same value is used for all local rows). 4383 - o_nnz - array containing the number of nonzeros in the various rows of the 4384 OFF-DIAGONAL portion of the local submatrix (possibly different for 4385 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4386 structure. The size of this array is equal to the number 4387 of local rows, i.e 'm'. 4388 4389 Output Parameter: 4390 . A - the matrix 4391 4392 Options Database Keys: 4393 + -mat_no_inode - Do not use inodes 4394 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4395 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4396 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4397 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4398 4399 Level: intermediate 4400 4401 Notes: 4402 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4403 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4404 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4405 4406 If the *_nnz parameter is given then the *_nz parameter is ignored 4407 4408 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4409 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4410 storage requirements for this matrix. 4411 4412 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4413 processor than it must be used on all processors that share the object for 4414 that argument. 4415 4416 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4417 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4418 4419 The user MUST specify either the local or global matrix dimensions 4420 (possibly both). 4421 4422 The parallel matrix is partitioned across processors such that the 4423 first `m0` rows belong to process 0, the next `m1` rows belong to 4424 process 1, the next `m2` rows belong to process 2, etc., where 4425 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4426 values corresponding to [m x N] submatrix. 4427 4428 The columns are logically partitioned with the n0 columns belonging 4429 to 0th partition, the next n1 columns belonging to the next 4430 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4431 4432 The DIAGONAL portion of the local submatrix on any given processor 4433 is the submatrix corresponding to the rows and columns m,n 4434 corresponding to the given processor. i.e diagonal matrix on 4435 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4436 etc. The remaining portion of the local submatrix [m x (N-n)] 4437 constitute the OFF-DIAGONAL portion. The example below better 4438 illustrates this concept. The two matrices, the DIAGONAL portion and 4439 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4440 4441 For a square global matrix we define each processor's diagonal portion 4442 to be its local rows and the corresponding columns (a square submatrix); 4443 each processor's off-diagonal portion encompasses the remainder of the 4444 local matrix (a rectangular submatrix). 4445 4446 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4447 4448 When calling this routine with a single process communicator, a matrix of 4449 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4450 type of communicator, use the construction mechanism 4451 .vb 4452 MatCreate(..., &A); 4453 MatSetType(A, MATMPIAIJ); 4454 MatSetSizes(A, m, n, M, N); 4455 MatMPIAIJSetPreallocation(A, ...); 4456 .ve 4457 4458 By default, this format uses inodes (identical nodes) when possible. 4459 We search for consecutive rows with the same nonzero structure, thereby 4460 reusing matrix information to achieve increased efficiency. 4461 4462 Example Usage: 4463 Consider the following 8x8 matrix with 34 non-zero values, that is 4464 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4465 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4466 as follows 4467 4468 .vb 4469 1 2 0 | 0 3 0 | 0 4 4470 Proc0 0 5 6 | 7 0 0 | 8 0 4471 9 0 10 | 11 0 0 | 12 0 4472 ------------------------------------- 4473 13 0 14 | 15 16 17 | 0 0 4474 Proc1 0 18 0 | 19 20 21 | 0 0 4475 0 0 0 | 22 23 0 | 24 0 4476 ------------------------------------- 4477 Proc2 25 26 27 | 0 0 28 | 29 0 4478 30 0 0 | 31 32 33 | 0 34 4479 .ve 4480 4481 This can be represented as a collection of submatrices as 4482 4483 .vb 4484 A B C 4485 D E F 4486 G H I 4487 .ve 4488 4489 Where the submatrices A,B,C are owned by proc0, D,E,F are 4490 owned by proc1, G,H,I are owned by proc2. 4491 4492 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4493 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'M','N' parameters are 8,8, and have the same values on all procs. 4495 4496 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4497 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4498 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4499 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4500 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4501 matrix, and [DF] as another SeqAIJ matrix. 4502 4503 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4504 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4505 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4506 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4507 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4508 In this case, the values of `d_nz`,`o_nz` are 4509 .vb 4510 proc0 dnz = 2, o_nz = 2 4511 proc1 dnz = 3, o_nz = 2 4512 proc2 dnz = 1, o_nz = 4 4513 .ve 4514 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4515 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4516 for proc3. i.e we are using 12+15+10=37 storage locations to store 4517 34 values. 4518 4519 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4520 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4521 In the above case the values for d_nnz,o_nnz are 4522 .vb 4523 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4524 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4525 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4526 .ve 4527 Here the space allocated is sum of all the above values i.e 34, and 4528 hence pre-allocation is perfect. 4529 4530 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4531 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4532 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4533 @*/ 4534 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4535 { 4536 PetscMPIInt size; 4537 4538 PetscFunctionBegin; 4539 PetscCall(MatCreate(comm, A)); 4540 PetscCall(MatSetSizes(*A, m, n, M, N)); 4541 PetscCallMPI(MPI_Comm_size(comm, &size)); 4542 if (size > 1) { 4543 PetscCall(MatSetType(*A, MATMPIAIJ)); 4544 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4545 } else { 4546 PetscCall(MatSetType(*A, MATSEQAIJ)); 4547 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4548 } 4549 PetscFunctionReturn(PETSC_SUCCESS); 4550 } 4551 4552 /*@C 4553 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4554 4555 Not Collective 4556 4557 Input Parameter: 4558 . A - The `MATMPIAIJ` matrix 4559 4560 Output Parameters: 4561 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4562 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4563 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4564 4565 Level: intermediate 4566 4567 Note: 4568 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4569 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4570 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4571 local column numbers to global column numbers in the original matrix. 4572 4573 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4574 @*/ 4575 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4576 { 4577 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4578 PetscBool flg; 4579 4580 PetscFunctionBegin; 4581 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4582 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4583 if (Ad) *Ad = a->A; 4584 if (Ao) *Ao = a->B; 4585 if (colmap) *colmap = a->garray; 4586 PetscFunctionReturn(PETSC_SUCCESS); 4587 } 4588 4589 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4590 { 4591 PetscInt m, N, i, rstart, nnz, Ii; 4592 PetscInt *indx; 4593 PetscScalar *values; 4594 MatType rootType; 4595 4596 PetscFunctionBegin; 4597 PetscCall(MatGetSize(inmat, &m, &N)); 4598 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4599 PetscInt *dnz, *onz, sum, bs, cbs; 4600 4601 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4602 /* Check sum(n) = N */ 4603 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4604 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4605 4606 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4607 rstart -= m; 4608 4609 MatPreallocateBegin(comm, m, n, dnz, onz); 4610 for (i = 0; i < m; i++) { 4611 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4612 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4613 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4614 } 4615 4616 PetscCall(MatCreate(comm, outmat)); 4617 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4618 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4619 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4620 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4621 PetscCall(MatSetType(*outmat, rootType)); 4622 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4623 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4624 MatPreallocateEnd(dnz, onz); 4625 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4626 } 4627 4628 /* numeric phase */ 4629 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4630 for (i = 0; i < m; i++) { 4631 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4632 Ii = i + rstart; 4633 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4634 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4635 } 4636 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4637 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4638 PetscFunctionReturn(PETSC_SUCCESS); 4639 } 4640 4641 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4642 { 4643 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4644 4645 PetscFunctionBegin; 4646 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4647 PetscCall(PetscFree(merge->id_r)); 4648 PetscCall(PetscFree(merge->len_s)); 4649 PetscCall(PetscFree(merge->len_r)); 4650 PetscCall(PetscFree(merge->bi)); 4651 PetscCall(PetscFree(merge->bj)); 4652 PetscCall(PetscFree(merge->buf_ri[0])); 4653 PetscCall(PetscFree(merge->buf_ri)); 4654 PetscCall(PetscFree(merge->buf_rj[0])); 4655 PetscCall(PetscFree(merge->buf_rj)); 4656 PetscCall(PetscFree(merge->coi)); 4657 PetscCall(PetscFree(merge->coj)); 4658 PetscCall(PetscFree(merge->owners_co)); 4659 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4660 PetscCall(PetscFree(merge)); 4661 PetscFunctionReturn(PETSC_SUCCESS); 4662 } 4663 4664 #include <../src/mat/utils/freespace.h> 4665 #include <petscbt.h> 4666 4667 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4668 { 4669 MPI_Comm comm; 4670 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4671 PetscMPIInt size, rank, taga, *len_s; 4672 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4673 PetscMPIInt proc, k; 4674 PetscInt **buf_ri, **buf_rj; 4675 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4676 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4677 MPI_Request *s_waits, *r_waits; 4678 MPI_Status *status; 4679 const MatScalar *aa, *a_a; 4680 MatScalar **abuf_r, *ba_i; 4681 Mat_Merge_SeqsToMPI *merge; 4682 PetscContainer container; 4683 4684 PetscFunctionBegin; 4685 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4686 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4687 4688 PetscCallMPI(MPI_Comm_size(comm, &size)); 4689 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4690 4691 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4692 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4693 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4694 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4695 aa = a_a; 4696 4697 bi = merge->bi; 4698 bj = merge->bj; 4699 buf_ri = merge->buf_ri; 4700 buf_rj = merge->buf_rj; 4701 4702 PetscCall(PetscMalloc1(size, &status)); 4703 owners = merge->rowmap->range; 4704 len_s = merge->len_s; 4705 4706 /* send and recv matrix values */ 4707 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4708 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4709 4710 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4711 for (proc = 0, k = 0; proc < size; proc++) { 4712 if (!len_s[proc]) continue; 4713 i = owners[proc]; 4714 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4715 k++; 4716 } 4717 4718 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4719 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4720 PetscCall(PetscFree(status)); 4721 4722 PetscCall(PetscFree(s_waits)); 4723 PetscCall(PetscFree(r_waits)); 4724 4725 /* insert mat values of mpimat */ 4726 PetscCall(PetscMalloc1(N, &ba_i)); 4727 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4728 4729 for (k = 0; k < merge->nrecv; k++) { 4730 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4731 nrows = *buf_ri_k[k]; 4732 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4733 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4734 } 4735 4736 /* set values of ba */ 4737 m = merge->rowmap->n; 4738 for (i = 0; i < m; i++) { 4739 arow = owners[rank] + i; 4740 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4741 bnzi = bi[i + 1] - bi[i]; 4742 PetscCall(PetscArrayzero(ba_i, bnzi)); 4743 4744 /* add local non-zero vals of this proc's seqmat into ba */ 4745 anzi = ai[arow + 1] - ai[arow]; 4746 aj = a->j + ai[arow]; 4747 aa = a_a + ai[arow]; 4748 nextaj = 0; 4749 for (j = 0; nextaj < anzi; j++) { 4750 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4751 ba_i[j] += aa[nextaj++]; 4752 } 4753 } 4754 4755 /* add received vals into ba */ 4756 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4757 /* i-th row */ 4758 if (i == *nextrow[k]) { 4759 anzi = *(nextai[k] + 1) - *nextai[k]; 4760 aj = buf_rj[k] + *nextai[k]; 4761 aa = abuf_r[k] + *nextai[k]; 4762 nextaj = 0; 4763 for (j = 0; nextaj < anzi; j++) { 4764 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4765 ba_i[j] += aa[nextaj++]; 4766 } 4767 } 4768 nextrow[k]++; 4769 nextai[k]++; 4770 } 4771 } 4772 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4773 } 4774 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4775 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4776 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4777 4778 PetscCall(PetscFree(abuf_r[0])); 4779 PetscCall(PetscFree(abuf_r)); 4780 PetscCall(PetscFree(ba_i)); 4781 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4782 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4783 PetscFunctionReturn(PETSC_SUCCESS); 4784 } 4785 4786 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4787 { 4788 Mat B_mpi; 4789 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4790 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4791 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4792 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4793 PetscInt len, *dnz, *onz, bs, cbs; 4794 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4795 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4796 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4797 MPI_Status *status; 4798 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4799 PetscBT lnkbt; 4800 Mat_Merge_SeqsToMPI *merge; 4801 PetscContainer container; 4802 4803 PetscFunctionBegin; 4804 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4805 4806 /* make sure it is a PETSc comm */ 4807 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4808 PetscCallMPI(MPI_Comm_size(comm, &size)); 4809 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4810 4811 PetscCall(PetscNew(&merge)); 4812 PetscCall(PetscMalloc1(size, &status)); 4813 4814 /* determine row ownership */ 4815 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4816 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4817 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4818 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4819 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4820 PetscCall(PetscMalloc1(size, &len_si)); 4821 PetscCall(PetscMalloc1(size, &merge->len_s)); 4822 4823 m = merge->rowmap->n; 4824 owners = merge->rowmap->range; 4825 4826 /* determine the number of messages to send, their lengths */ 4827 len_s = merge->len_s; 4828 4829 len = 0; /* length of buf_si[] */ 4830 merge->nsend = 0; 4831 for (PetscMPIInt proc = 0; proc < size; proc++) { 4832 len_si[proc] = 0; 4833 if (proc == rank) { 4834 len_s[proc] = 0; 4835 } else { 4836 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4837 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4838 } 4839 if (len_s[proc]) { 4840 merge->nsend++; 4841 nrows = 0; 4842 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4843 if (ai[i + 1] > ai[i]) nrows++; 4844 } 4845 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4846 len += len_si[proc]; 4847 } 4848 } 4849 4850 /* determine the number and length of messages to receive for ij-structure */ 4851 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4852 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4853 4854 /* post the Irecv of j-structure */ 4855 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4856 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4857 4858 /* post the Isend of j-structure */ 4859 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4860 4861 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4862 if (!len_s[proc]) continue; 4863 i = owners[proc]; 4864 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4865 k++; 4866 } 4867 4868 /* receives and sends of j-structure are complete */ 4869 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4870 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4871 4872 /* send and recv i-structure */ 4873 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4874 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4875 4876 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4877 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4878 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4879 if (!len_s[proc]) continue; 4880 /* form outgoing message for i-structure: 4881 buf_si[0]: nrows to be sent 4882 [1:nrows]: row index (global) 4883 [nrows+1:2*nrows+1]: i-structure index 4884 */ 4885 nrows = len_si[proc] / 2 - 1; 4886 buf_si_i = buf_si + nrows + 1; 4887 buf_si[0] = nrows; 4888 buf_si_i[0] = 0; 4889 nrows = 0; 4890 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4891 anzi = ai[i + 1] - ai[i]; 4892 if (anzi) { 4893 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4894 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4895 nrows++; 4896 } 4897 } 4898 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4899 k++; 4900 buf_si += len_si[proc]; 4901 } 4902 4903 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4904 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4905 4906 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4907 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4908 4909 PetscCall(PetscFree(len_si)); 4910 PetscCall(PetscFree(len_ri)); 4911 PetscCall(PetscFree(rj_waits)); 4912 PetscCall(PetscFree2(si_waits, sj_waits)); 4913 PetscCall(PetscFree(ri_waits)); 4914 PetscCall(PetscFree(buf_s)); 4915 PetscCall(PetscFree(status)); 4916 4917 /* compute a local seq matrix in each processor */ 4918 /* allocate bi array and free space for accumulating nonzero column info */ 4919 PetscCall(PetscMalloc1(m + 1, &bi)); 4920 bi[0] = 0; 4921 4922 /* create and initialize a linked list */ 4923 nlnk = N + 1; 4924 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4925 4926 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4927 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4928 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4929 4930 current_space = free_space; 4931 4932 /* determine symbolic info for each local row */ 4933 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4934 4935 for (k = 0; k < merge->nrecv; k++) { 4936 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4937 nrows = *buf_ri_k[k]; 4938 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4939 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4940 } 4941 4942 MatPreallocateBegin(comm, m, n, dnz, onz); 4943 len = 0; 4944 for (i = 0; i < m; i++) { 4945 bnzi = 0; 4946 /* add local non-zero cols of this proc's seqmat into lnk */ 4947 arow = owners[rank] + i; 4948 anzi = ai[arow + 1] - ai[arow]; 4949 aj = a->j + ai[arow]; 4950 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4951 bnzi += nlnk; 4952 /* add received col data into lnk */ 4953 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4954 if (i == *nextrow[k]) { /* i-th row */ 4955 anzi = *(nextai[k] + 1) - *nextai[k]; 4956 aj = buf_rj[k] + *nextai[k]; 4957 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4958 bnzi += nlnk; 4959 nextrow[k]++; 4960 nextai[k]++; 4961 } 4962 } 4963 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4964 4965 /* if free space is not available, make more free space */ 4966 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4967 /* copy data into free space, then initialize lnk */ 4968 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4969 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4970 4971 current_space->array += bnzi; 4972 current_space->local_used += bnzi; 4973 current_space->local_remaining -= bnzi; 4974 4975 bi[i + 1] = bi[i] + bnzi; 4976 } 4977 4978 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4979 4980 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4981 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4982 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4983 4984 /* create symbolic parallel matrix B_mpi */ 4985 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4986 PetscCall(MatCreate(comm, &B_mpi)); 4987 if (n == PETSC_DECIDE) { 4988 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4989 } else { 4990 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4991 } 4992 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4993 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4994 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4995 MatPreallocateEnd(dnz, onz); 4996 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4997 4998 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4999 B_mpi->assembled = PETSC_FALSE; 5000 merge->bi = bi; 5001 merge->bj = bj; 5002 merge->buf_ri = buf_ri; 5003 merge->buf_rj = buf_rj; 5004 merge->coi = NULL; 5005 merge->coj = NULL; 5006 merge->owners_co = NULL; 5007 5008 PetscCall(PetscCommDestroy(&comm)); 5009 5010 /* attach the supporting struct to B_mpi for reuse */ 5011 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5012 PetscCall(PetscContainerSetPointer(container, merge)); 5013 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5014 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5015 PetscCall(PetscContainerDestroy(&container)); 5016 *mpimat = B_mpi; 5017 5018 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5019 PetscFunctionReturn(PETSC_SUCCESS); 5020 } 5021 5022 /*@ 5023 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5024 matrices from each processor 5025 5026 Collective 5027 5028 Input Parameters: 5029 + comm - the communicators the parallel matrix will live on 5030 . seqmat - the input sequential matrices 5031 . m - number of local rows (or `PETSC_DECIDE`) 5032 . n - number of local columns (or `PETSC_DECIDE`) 5033 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5034 5035 Output Parameter: 5036 . mpimat - the parallel matrix generated 5037 5038 Level: advanced 5039 5040 Note: 5041 The dimensions of the sequential matrix in each processor MUST be the same. 5042 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5043 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5044 5045 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5046 @*/ 5047 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5048 { 5049 PetscMPIInt size; 5050 5051 PetscFunctionBegin; 5052 PetscCallMPI(MPI_Comm_size(comm, &size)); 5053 if (size == 1) { 5054 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5055 if (scall == MAT_INITIAL_MATRIX) { 5056 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5057 } else { 5058 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5059 } 5060 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5061 PetscFunctionReturn(PETSC_SUCCESS); 5062 } 5063 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5064 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5065 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5066 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5067 PetscFunctionReturn(PETSC_SUCCESS); 5068 } 5069 5070 /*@ 5071 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5072 5073 Not Collective 5074 5075 Input Parameter: 5076 . A - the matrix 5077 5078 Output Parameter: 5079 . A_loc - the local sequential matrix generated 5080 5081 Level: developer 5082 5083 Notes: 5084 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5085 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5086 `n` is the global column count obtained with `MatGetSize()` 5087 5088 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5089 5090 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5091 5092 Destroy the matrix with `MatDestroy()` 5093 5094 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5095 @*/ 5096 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5097 { 5098 PetscBool mpi; 5099 5100 PetscFunctionBegin; 5101 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5102 if (mpi) { 5103 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5104 } else { 5105 *A_loc = A; 5106 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5107 } 5108 PetscFunctionReturn(PETSC_SUCCESS); 5109 } 5110 5111 /*@ 5112 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5113 5114 Not Collective 5115 5116 Input Parameters: 5117 + A - the matrix 5118 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5119 5120 Output Parameter: 5121 . A_loc - the local sequential matrix generated 5122 5123 Level: developer 5124 5125 Notes: 5126 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5127 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5128 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5129 5130 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5131 5132 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5133 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5134 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5135 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5136 5137 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5138 @*/ 5139 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5140 { 5141 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5142 Mat_SeqAIJ *mat, *a, *b; 5143 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5144 const PetscScalar *aa, *ba, *aav, *bav; 5145 PetscScalar *ca, *cam; 5146 PetscMPIInt size; 5147 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5148 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5149 PetscBool match; 5150 5151 PetscFunctionBegin; 5152 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5153 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5154 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5155 if (size == 1) { 5156 if (scall == MAT_INITIAL_MATRIX) { 5157 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5158 *A_loc = mpimat->A; 5159 } else if (scall == MAT_REUSE_MATRIX) { 5160 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5161 } 5162 PetscFunctionReturn(PETSC_SUCCESS); 5163 } 5164 5165 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5166 a = (Mat_SeqAIJ *)mpimat->A->data; 5167 b = (Mat_SeqAIJ *)mpimat->B->data; 5168 ai = a->i; 5169 aj = a->j; 5170 bi = b->i; 5171 bj = b->j; 5172 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5173 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5174 aa = aav; 5175 ba = bav; 5176 if (scall == MAT_INITIAL_MATRIX) { 5177 PetscCall(PetscMalloc1(1 + am, &ci)); 5178 ci[0] = 0; 5179 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5180 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5181 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5182 k = 0; 5183 for (i = 0; i < am; i++) { 5184 ncols_o = bi[i + 1] - bi[i]; 5185 ncols_d = ai[i + 1] - ai[i]; 5186 /* off-diagonal portion of A */ 5187 for (jo = 0; jo < ncols_o; jo++) { 5188 col = cmap[*bj]; 5189 if (col >= cstart) break; 5190 cj[k] = col; 5191 bj++; 5192 ca[k++] = *ba++; 5193 } 5194 /* diagonal portion of A */ 5195 for (j = 0; j < ncols_d; j++) { 5196 cj[k] = cstart + *aj++; 5197 ca[k++] = *aa++; 5198 } 5199 /* off-diagonal portion of A */ 5200 for (j = jo; j < ncols_o; j++) { 5201 cj[k] = cmap[*bj++]; 5202 ca[k++] = *ba++; 5203 } 5204 } 5205 /* put together the new matrix */ 5206 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5207 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5208 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5209 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5210 mat->free_a = PETSC_TRUE; 5211 mat->free_ij = PETSC_TRUE; 5212 mat->nonew = 0; 5213 } else if (scall == MAT_REUSE_MATRIX) { 5214 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5215 ci = mat->i; 5216 cj = mat->j; 5217 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5218 for (i = 0; i < am; i++) { 5219 /* off-diagonal portion of A */ 5220 ncols_o = bi[i + 1] - bi[i]; 5221 for (jo = 0; jo < ncols_o; jo++) { 5222 col = cmap[*bj]; 5223 if (col >= cstart) break; 5224 *cam++ = *ba++; 5225 bj++; 5226 } 5227 /* diagonal portion of A */ 5228 ncols_d = ai[i + 1] - ai[i]; 5229 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5230 /* off-diagonal portion of A */ 5231 for (j = jo; j < ncols_o; j++) { 5232 *cam++ = *ba++; 5233 bj++; 5234 } 5235 } 5236 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5237 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5238 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5239 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5240 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5241 PetscFunctionReturn(PETSC_SUCCESS); 5242 } 5243 5244 /*@ 5245 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5246 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5247 5248 Not Collective 5249 5250 Input Parameters: 5251 + A - the matrix 5252 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5253 5254 Output Parameters: 5255 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5256 - A_loc - the local sequential matrix generated 5257 5258 Level: developer 5259 5260 Note: 5261 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5262 part, then those associated with the off-diagonal part (in its local ordering) 5263 5264 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5265 @*/ 5266 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5267 { 5268 Mat Ao, Ad; 5269 const PetscInt *cmap; 5270 PetscMPIInt size; 5271 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5272 5273 PetscFunctionBegin; 5274 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5275 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5276 if (size == 1) { 5277 if (scall == MAT_INITIAL_MATRIX) { 5278 PetscCall(PetscObjectReference((PetscObject)Ad)); 5279 *A_loc = Ad; 5280 } else if (scall == MAT_REUSE_MATRIX) { 5281 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5282 } 5283 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5284 PetscFunctionReturn(PETSC_SUCCESS); 5285 } 5286 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5287 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5288 if (f) { 5289 PetscCall((*f)(A, scall, glob, A_loc)); 5290 } else { 5291 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5292 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5293 Mat_SeqAIJ *c; 5294 PetscInt *ai = a->i, *aj = a->j; 5295 PetscInt *bi = b->i, *bj = b->j; 5296 PetscInt *ci, *cj; 5297 const PetscScalar *aa, *ba; 5298 PetscScalar *ca; 5299 PetscInt i, j, am, dn, on; 5300 5301 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5302 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5303 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5304 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5305 if (scall == MAT_INITIAL_MATRIX) { 5306 PetscInt k; 5307 PetscCall(PetscMalloc1(1 + am, &ci)); 5308 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5309 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5310 ci[0] = 0; 5311 for (i = 0, k = 0; i < am; i++) { 5312 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5313 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5314 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5315 /* diagonal portion of A */ 5316 for (j = 0; j < ncols_d; j++, k++) { 5317 cj[k] = *aj++; 5318 ca[k] = *aa++; 5319 } 5320 /* off-diagonal portion of A */ 5321 for (j = 0; j < ncols_o; j++, k++) { 5322 cj[k] = dn + *bj++; 5323 ca[k] = *ba++; 5324 } 5325 } 5326 /* put together the new matrix */ 5327 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5328 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5329 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5330 c = (Mat_SeqAIJ *)(*A_loc)->data; 5331 c->free_a = PETSC_TRUE; 5332 c->free_ij = PETSC_TRUE; 5333 c->nonew = 0; 5334 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5335 } else if (scall == MAT_REUSE_MATRIX) { 5336 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5337 for (i = 0; i < am; i++) { 5338 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5339 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5340 /* diagonal portion of A */ 5341 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5342 /* off-diagonal portion of A */ 5343 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5344 } 5345 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5346 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5347 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5348 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5349 if (glob) { 5350 PetscInt cst, *gidx; 5351 5352 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5353 PetscCall(PetscMalloc1(dn + on, &gidx)); 5354 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5355 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5356 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5357 } 5358 } 5359 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5360 PetscFunctionReturn(PETSC_SUCCESS); 5361 } 5362 5363 /*@C 5364 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5365 5366 Not Collective 5367 5368 Input Parameters: 5369 + A - the matrix 5370 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5371 . row - index set of rows to extract (or `NULL`) 5372 - col - index set of columns to extract (or `NULL`) 5373 5374 Output Parameter: 5375 . A_loc - the local sequential matrix generated 5376 5377 Level: developer 5378 5379 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5380 @*/ 5381 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5382 { 5383 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5384 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5385 IS isrowa, iscola; 5386 Mat *aloc; 5387 PetscBool match; 5388 5389 PetscFunctionBegin; 5390 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5391 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5392 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5393 if (!row) { 5394 start = A->rmap->rstart; 5395 end = A->rmap->rend; 5396 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5397 } else { 5398 isrowa = *row; 5399 } 5400 if (!col) { 5401 start = A->cmap->rstart; 5402 cmap = a->garray; 5403 nzA = a->A->cmap->n; 5404 nzB = a->B->cmap->n; 5405 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5406 ncols = 0; 5407 for (i = 0; i < nzB; i++) { 5408 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5409 else break; 5410 } 5411 imark = i; 5412 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5413 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5414 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5415 } else { 5416 iscola = *col; 5417 } 5418 if (scall != MAT_INITIAL_MATRIX) { 5419 PetscCall(PetscMalloc1(1, &aloc)); 5420 aloc[0] = *A_loc; 5421 } 5422 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5423 if (!col) { /* attach global id of condensed columns */ 5424 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5425 } 5426 *A_loc = aloc[0]; 5427 PetscCall(PetscFree(aloc)); 5428 if (!row) PetscCall(ISDestroy(&isrowa)); 5429 if (!col) PetscCall(ISDestroy(&iscola)); 5430 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5431 PetscFunctionReturn(PETSC_SUCCESS); 5432 } 5433 5434 /* 5435 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5436 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5437 * on a global size. 5438 * */ 5439 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5440 { 5441 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5442 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5443 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5444 PetscMPIInt owner; 5445 PetscSFNode *iremote, *oiremote; 5446 const PetscInt *lrowindices; 5447 PetscSF sf, osf; 5448 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5449 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5450 MPI_Comm comm; 5451 ISLocalToGlobalMapping mapping; 5452 const PetscScalar *pd_a, *po_a; 5453 5454 PetscFunctionBegin; 5455 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5456 /* plocalsize is the number of roots 5457 * nrows is the number of leaves 5458 * */ 5459 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5460 PetscCall(ISGetLocalSize(rows, &nrows)); 5461 PetscCall(PetscCalloc1(nrows, &iremote)); 5462 PetscCall(ISGetIndices(rows, &lrowindices)); 5463 for (i = 0; i < nrows; i++) { 5464 /* Find a remote index and an owner for a row 5465 * The row could be local or remote 5466 * */ 5467 owner = 0; 5468 lidx = 0; 5469 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5470 iremote[i].index = lidx; 5471 iremote[i].rank = owner; 5472 } 5473 /* Create SF to communicate how many nonzero columns for each row */ 5474 PetscCall(PetscSFCreate(comm, &sf)); 5475 /* SF will figure out the number of nonzero columns for each row, and their 5476 * offsets 5477 * */ 5478 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5479 PetscCall(PetscSFSetFromOptions(sf)); 5480 PetscCall(PetscSFSetUp(sf)); 5481 5482 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5483 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5484 PetscCall(PetscCalloc1(nrows, &pnnz)); 5485 roffsets[0] = 0; 5486 roffsets[1] = 0; 5487 for (i = 0; i < plocalsize; i++) { 5488 /* diagonal */ 5489 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5490 /* off-diagonal */ 5491 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5492 /* compute offsets so that we relative location for each row */ 5493 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5494 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5495 } 5496 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5497 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5498 /* 'r' means root, and 'l' means leaf */ 5499 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5500 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5501 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5502 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5503 PetscCall(PetscSFDestroy(&sf)); 5504 PetscCall(PetscFree(roffsets)); 5505 PetscCall(PetscFree(nrcols)); 5506 dntotalcols = 0; 5507 ontotalcols = 0; 5508 ncol = 0; 5509 for (i = 0; i < nrows; i++) { 5510 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5511 ncol = PetscMax(pnnz[i], ncol); 5512 /* diagonal */ 5513 dntotalcols += nlcols[i * 2 + 0]; 5514 /* off-diagonal */ 5515 ontotalcols += nlcols[i * 2 + 1]; 5516 } 5517 /* We do not need to figure the right number of columns 5518 * since all the calculations will be done by going through the raw data 5519 * */ 5520 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5521 PetscCall(MatSetUp(*P_oth)); 5522 PetscCall(PetscFree(pnnz)); 5523 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5524 /* diagonal */ 5525 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5526 /* off-diagonal */ 5527 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5528 /* diagonal */ 5529 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5530 /* off-diagonal */ 5531 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5532 dntotalcols = 0; 5533 ontotalcols = 0; 5534 ntotalcols = 0; 5535 for (i = 0; i < nrows; i++) { 5536 owner = 0; 5537 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5538 /* Set iremote for diag matrix */ 5539 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5540 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5541 iremote[dntotalcols].rank = owner; 5542 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5543 ilocal[dntotalcols++] = ntotalcols++; 5544 } 5545 /* off-diagonal */ 5546 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5547 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5548 oiremote[ontotalcols].rank = owner; 5549 oilocal[ontotalcols++] = ntotalcols++; 5550 } 5551 } 5552 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5553 PetscCall(PetscFree(loffsets)); 5554 PetscCall(PetscFree(nlcols)); 5555 PetscCall(PetscSFCreate(comm, &sf)); 5556 /* P serves as roots and P_oth is leaves 5557 * Diag matrix 5558 * */ 5559 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5560 PetscCall(PetscSFSetFromOptions(sf)); 5561 PetscCall(PetscSFSetUp(sf)); 5562 5563 PetscCall(PetscSFCreate(comm, &osf)); 5564 /* off-diagonal */ 5565 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5566 PetscCall(PetscSFSetFromOptions(osf)); 5567 PetscCall(PetscSFSetUp(osf)); 5568 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5569 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5570 /* operate on the matrix internal data to save memory */ 5571 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5572 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5573 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5574 /* Convert to global indices for diag matrix */ 5575 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5576 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5577 /* We want P_oth store global indices */ 5578 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5579 /* Use memory scalable approach */ 5580 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5581 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5582 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5583 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5584 /* Convert back to local indices */ 5585 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5586 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5587 nout = 0; 5588 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5589 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5590 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5591 /* Exchange values */ 5592 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5593 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5594 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5595 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5596 /* Stop PETSc from shrinking memory */ 5597 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5598 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5599 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5600 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5601 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5602 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5603 PetscCall(PetscSFDestroy(&sf)); 5604 PetscCall(PetscSFDestroy(&osf)); 5605 PetscFunctionReturn(PETSC_SUCCESS); 5606 } 5607 5608 /* 5609 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5610 * This supports MPIAIJ and MAIJ 5611 * */ 5612 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5613 { 5614 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5615 Mat_SeqAIJ *p_oth; 5616 IS rows, map; 5617 PetscHMapI hamp; 5618 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5619 MPI_Comm comm; 5620 PetscSF sf, osf; 5621 PetscBool has; 5622 5623 PetscFunctionBegin; 5624 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5625 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5626 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5627 * and then create a submatrix (that often is an overlapping matrix) 5628 * */ 5629 if (reuse == MAT_INITIAL_MATRIX) { 5630 /* Use a hash table to figure out unique keys */ 5631 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5632 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5633 count = 0; 5634 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5635 for (i = 0; i < a->B->cmap->n; i++) { 5636 key = a->garray[i] / dof; 5637 PetscCall(PetscHMapIHas(hamp, key, &has)); 5638 if (!has) { 5639 mapping[i] = count; 5640 PetscCall(PetscHMapISet(hamp, key, count++)); 5641 } else { 5642 /* Current 'i' has the same value the previous step */ 5643 mapping[i] = count - 1; 5644 } 5645 } 5646 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5647 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5648 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5649 PetscCall(PetscCalloc1(htsize, &rowindices)); 5650 off = 0; 5651 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5652 PetscCall(PetscHMapIDestroy(&hamp)); 5653 PetscCall(PetscSortInt(htsize, rowindices)); 5654 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5655 /* In case, the matrix was already created but users want to recreate the matrix */ 5656 PetscCall(MatDestroy(P_oth)); 5657 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5658 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5659 PetscCall(ISDestroy(&map)); 5660 PetscCall(ISDestroy(&rows)); 5661 } else if (reuse == MAT_REUSE_MATRIX) { 5662 /* If matrix was already created, we simply update values using SF objects 5663 * that as attached to the matrix earlier. 5664 */ 5665 const PetscScalar *pd_a, *po_a; 5666 5667 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5668 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5669 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5670 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5671 /* Update values in place */ 5672 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5673 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5674 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5675 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5676 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5677 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5678 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5679 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5680 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5681 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5682 PetscFunctionReturn(PETSC_SUCCESS); 5683 } 5684 5685 /*@C 5686 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5687 5688 Collective 5689 5690 Input Parameters: 5691 + A - the first matrix in `MATMPIAIJ` format 5692 . B - the second matrix in `MATMPIAIJ` format 5693 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5694 5695 Output Parameters: 5696 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5697 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5698 - B_seq - the sequential matrix generated 5699 5700 Level: developer 5701 5702 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5703 @*/ 5704 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5705 { 5706 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5707 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5708 IS isrowb, iscolb; 5709 Mat *bseq = NULL; 5710 5711 PetscFunctionBegin; 5712 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5713 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5714 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5715 5716 if (scall == MAT_INITIAL_MATRIX) { 5717 start = A->cmap->rstart; 5718 cmap = a->garray; 5719 nzA = a->A->cmap->n; 5720 nzB = a->B->cmap->n; 5721 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5722 ncols = 0; 5723 for (i = 0; i < nzB; i++) { /* row < local row index */ 5724 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5725 else break; 5726 } 5727 imark = i; 5728 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5729 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5730 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5731 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5732 } else { 5733 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5734 isrowb = *rowb; 5735 iscolb = *colb; 5736 PetscCall(PetscMalloc1(1, &bseq)); 5737 bseq[0] = *B_seq; 5738 } 5739 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5740 *B_seq = bseq[0]; 5741 PetscCall(PetscFree(bseq)); 5742 if (!rowb) { 5743 PetscCall(ISDestroy(&isrowb)); 5744 } else { 5745 *rowb = isrowb; 5746 } 5747 if (!colb) { 5748 PetscCall(ISDestroy(&iscolb)); 5749 } else { 5750 *colb = iscolb; 5751 } 5752 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5753 PetscFunctionReturn(PETSC_SUCCESS); 5754 } 5755 5756 /* 5757 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5758 of the OFF-DIAGONAL portion of local A 5759 5760 Collective 5761 5762 Input Parameters: 5763 + A,B - the matrices in `MATMPIAIJ` format 5764 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5765 5766 Output Parameter: 5767 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5768 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5769 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5770 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5771 5772 Developer Note: 5773 This directly accesses information inside the VecScatter associated with the matrix-vector product 5774 for this matrix. This is not desirable.. 5775 5776 Level: developer 5777 5778 */ 5779 5780 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5781 { 5782 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5783 VecScatter ctx; 5784 MPI_Comm comm; 5785 const PetscMPIInt *rprocs, *sprocs; 5786 PetscMPIInt nrecvs, nsends; 5787 const PetscInt *srow, *rstarts, *sstarts; 5788 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5789 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5790 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5791 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5792 PetscMPIInt size, tag, rank, nreqs; 5793 5794 PetscFunctionBegin; 5795 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5796 PetscCallMPI(MPI_Comm_size(comm, &size)); 5797 5798 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5799 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5800 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5801 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5802 5803 if (size == 1) { 5804 startsj_s = NULL; 5805 bufa_ptr = NULL; 5806 *B_oth = NULL; 5807 PetscFunctionReturn(PETSC_SUCCESS); 5808 } 5809 5810 ctx = a->Mvctx; 5811 tag = ((PetscObject)ctx)->tag; 5812 5813 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5814 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5815 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5816 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5817 PetscCall(PetscMalloc1(nreqs, &reqs)); 5818 rwaits = reqs; 5819 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5820 5821 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5822 if (scall == MAT_INITIAL_MATRIX) { 5823 /* i-array */ 5824 /* post receives */ 5825 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5826 for (i = 0; i < nrecvs; i++) { 5827 rowlen = rvalues + rstarts[i] * rbs; 5828 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5829 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5830 } 5831 5832 /* pack the outgoing message */ 5833 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5834 5835 sstartsj[0] = 0; 5836 rstartsj[0] = 0; 5837 len = 0; /* total length of j or a array to be sent */ 5838 if (nsends) { 5839 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5840 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5841 } 5842 for (i = 0; i < nsends; i++) { 5843 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5844 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5845 for (j = 0; j < nrows; j++) { 5846 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5847 for (l = 0; l < sbs; l++) { 5848 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5849 5850 rowlen[j * sbs + l] = ncols; 5851 5852 len += ncols; 5853 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5854 } 5855 k++; 5856 } 5857 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5858 5859 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5860 } 5861 /* recvs and sends of i-array are completed */ 5862 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5863 PetscCall(PetscFree(svalues)); 5864 5865 /* allocate buffers for sending j and a arrays */ 5866 PetscCall(PetscMalloc1(len + 1, &bufj)); 5867 PetscCall(PetscMalloc1(len + 1, &bufa)); 5868 5869 /* create i-array of B_oth */ 5870 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5871 5872 b_othi[0] = 0; 5873 len = 0; /* total length of j or a array to be received */ 5874 k = 0; 5875 for (i = 0; i < nrecvs; i++) { 5876 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5877 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5878 for (j = 0; j < nrows; j++) { 5879 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5880 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5881 k++; 5882 } 5883 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5884 } 5885 PetscCall(PetscFree(rvalues)); 5886 5887 /* allocate space for j and a arrays of B_oth */ 5888 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5889 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5890 5891 /* j-array */ 5892 /* post receives of j-array */ 5893 for (i = 0; i < nrecvs; i++) { 5894 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5895 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5896 } 5897 5898 /* pack the outgoing message j-array */ 5899 if (nsends) k = sstarts[0]; 5900 for (i = 0; i < nsends; i++) { 5901 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5902 bufJ = bufj + sstartsj[i]; 5903 for (j = 0; j < nrows; j++) { 5904 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5905 for (ll = 0; ll < sbs; ll++) { 5906 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5907 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5908 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5909 } 5910 } 5911 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5912 } 5913 5914 /* recvs and sends of j-array are completed */ 5915 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5916 } else if (scall == MAT_REUSE_MATRIX) { 5917 sstartsj = *startsj_s; 5918 rstartsj = *startsj_r; 5919 bufa = *bufa_ptr; 5920 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5921 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5922 5923 /* a-array */ 5924 /* post receives of a-array */ 5925 for (i = 0; i < nrecvs; i++) { 5926 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5927 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5928 } 5929 5930 /* pack the outgoing message a-array */ 5931 if (nsends) k = sstarts[0]; 5932 for (i = 0; i < nsends; i++) { 5933 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5934 bufA = bufa + sstartsj[i]; 5935 for (j = 0; j < nrows; j++) { 5936 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5937 for (ll = 0; ll < sbs; ll++) { 5938 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5939 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5940 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5941 } 5942 } 5943 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5944 } 5945 /* recvs and sends of a-array are completed */ 5946 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5947 PetscCall(PetscFree(reqs)); 5948 5949 if (scall == MAT_INITIAL_MATRIX) { 5950 Mat_SeqAIJ *b_oth; 5951 5952 /* put together the new matrix */ 5953 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5954 5955 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5956 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5957 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5958 b_oth->free_a = PETSC_TRUE; 5959 b_oth->free_ij = PETSC_TRUE; 5960 b_oth->nonew = 0; 5961 5962 PetscCall(PetscFree(bufj)); 5963 if (!startsj_s || !bufa_ptr) { 5964 PetscCall(PetscFree2(sstartsj, rstartsj)); 5965 PetscCall(PetscFree(bufa_ptr)); 5966 } else { 5967 *startsj_s = sstartsj; 5968 *startsj_r = rstartsj; 5969 *bufa_ptr = bufa; 5970 } 5971 } else if (scall == MAT_REUSE_MATRIX) { 5972 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5973 } 5974 5975 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5976 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5977 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5978 PetscFunctionReturn(PETSC_SUCCESS); 5979 } 5980 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5984 #if defined(PETSC_HAVE_MKL_SPARSE) 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5986 #endif 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5989 #if defined(PETSC_HAVE_ELEMENTAL) 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5991 #endif 5992 #if defined(PETSC_HAVE_SCALAPACK) 5993 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5994 #endif 5995 #if defined(PETSC_HAVE_HYPRE) 5996 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5997 #endif 5998 #if defined(PETSC_HAVE_CUDA) 5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6000 #endif 6001 #if defined(PETSC_HAVE_HIP) 6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6003 #endif 6004 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6006 #endif 6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6008 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6009 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6010 6011 /* 6012 Computes (B'*A')' since computing B*A directly is untenable 6013 6014 n p p 6015 [ ] [ ] [ ] 6016 m [ A ] * n [ B ] = m [ C ] 6017 [ ] [ ] [ ] 6018 6019 */ 6020 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6021 { 6022 Mat At, Bt, Ct; 6023 6024 PetscFunctionBegin; 6025 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6026 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6027 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6028 PetscCall(MatDestroy(&At)); 6029 PetscCall(MatDestroy(&Bt)); 6030 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6031 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6032 PetscCall(MatDestroy(&Ct)); 6033 PetscFunctionReturn(PETSC_SUCCESS); 6034 } 6035 6036 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6037 { 6038 PetscBool cisdense; 6039 6040 PetscFunctionBegin; 6041 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6042 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6043 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6044 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6045 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6046 PetscCall(MatSetUp(C)); 6047 6048 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6049 PetscFunctionReturn(PETSC_SUCCESS); 6050 } 6051 6052 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6053 { 6054 Mat_Product *product = C->product; 6055 Mat A = product->A, B = product->B; 6056 6057 PetscFunctionBegin; 6058 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6059 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6060 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6061 C->ops->productsymbolic = MatProductSymbolic_AB; 6062 PetscFunctionReturn(PETSC_SUCCESS); 6063 } 6064 6065 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6066 { 6067 Mat_Product *product = C->product; 6068 6069 PetscFunctionBegin; 6070 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6071 PetscFunctionReturn(PETSC_SUCCESS); 6072 } 6073 6074 /* 6075 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6076 6077 Input Parameters: 6078 6079 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6080 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6081 6082 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6083 6084 For Set1, j1[] contains column indices of the nonzeros. 6085 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6086 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6087 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6088 6089 Similar for Set2. 6090 6091 This routine merges the two sets of nonzeros row by row and removes repeats. 6092 6093 Output Parameters: (memory is allocated by the caller) 6094 6095 i[],j[]: the CSR of the merged matrix, which has m rows. 6096 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6097 imap2[]: similar to imap1[], but for Set2. 6098 Note we order nonzeros row-by-row and from left to right. 6099 */ 6100 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6101 { 6102 PetscInt r, m; /* Row index of mat */ 6103 PetscCount t, t1, t2, b1, e1, b2, e2; 6104 6105 PetscFunctionBegin; 6106 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6107 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6108 i[0] = 0; 6109 for (r = 0; r < m; r++) { /* Do row by row merging */ 6110 b1 = rowBegin1[r]; 6111 e1 = rowEnd1[r]; 6112 b2 = rowBegin2[r]; 6113 e2 = rowEnd2[r]; 6114 while (b1 < e1 && b2 < e2) { 6115 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6116 j[t] = j1[b1]; 6117 imap1[t1] = t; 6118 imap2[t2] = t; 6119 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6120 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6121 t1++; 6122 t2++; 6123 t++; 6124 } else if (j1[b1] < j2[b2]) { 6125 j[t] = j1[b1]; 6126 imap1[t1] = t; 6127 b1 += jmap1[t1 + 1] - jmap1[t1]; 6128 t1++; 6129 t++; 6130 } else { 6131 j[t] = j2[b2]; 6132 imap2[t2] = t; 6133 b2 += jmap2[t2 + 1] - jmap2[t2]; 6134 t2++; 6135 t++; 6136 } 6137 } 6138 /* Merge the remaining in either j1[] or j2[] */ 6139 while (b1 < e1) { 6140 j[t] = j1[b1]; 6141 imap1[t1] = t; 6142 b1 += jmap1[t1 + 1] - jmap1[t1]; 6143 t1++; 6144 t++; 6145 } 6146 while (b2 < e2) { 6147 j[t] = j2[b2]; 6148 imap2[t2] = t; 6149 b2 += jmap2[t2 + 1] - jmap2[t2]; 6150 t2++; 6151 t++; 6152 } 6153 PetscCall(PetscIntCast(t, i + r + 1)); 6154 } 6155 PetscFunctionReturn(PETSC_SUCCESS); 6156 } 6157 6158 /* 6159 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6160 6161 Input Parameters: 6162 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6163 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6164 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6165 6166 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6167 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6168 6169 Output Parameters: 6170 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6171 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6172 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6173 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6174 6175 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6176 Atot: number of entries belonging to the diagonal block. 6177 Annz: number of unique nonzeros belonging to the diagonal block. 6178 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6179 repeats (i.e., same 'i,j' pair). 6180 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6181 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6182 6183 Atot: number of entries belonging to the diagonal block 6184 Annz: number of unique nonzeros belonging to the diagonal block. 6185 6186 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6187 6188 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6189 */ 6190 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6191 { 6192 PetscInt cstart, cend, rstart, rend, row, col; 6193 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6194 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6195 PetscCount k, m, p, q, r, s, mid; 6196 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6197 6198 PetscFunctionBegin; 6199 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6200 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6201 m = rend - rstart; 6202 6203 /* Skip negative rows */ 6204 for (k = 0; k < n; k++) 6205 if (i[k] >= 0) break; 6206 6207 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6208 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6209 */ 6210 while (k < n) { 6211 row = i[k]; 6212 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6213 for (s = k; s < n; s++) 6214 if (i[s] != row) break; 6215 6216 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6217 for (p = k; p < s; p++) { 6218 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6219 } 6220 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6221 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6222 rowBegin[row - rstart] = k; 6223 rowMid[row - rstart] = mid; 6224 rowEnd[row - rstart] = s; 6225 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6226 6227 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6228 Atot += mid - k; 6229 Btot += s - mid; 6230 6231 /* Count unique nonzeros of this diag row */ 6232 for (p = k; p < mid;) { 6233 col = j[p]; 6234 do { 6235 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6236 p++; 6237 } while (p < mid && j[p] == col); 6238 Annz++; 6239 } 6240 6241 /* Count unique nonzeros of this offdiag row */ 6242 for (p = mid; p < s;) { 6243 col = j[p]; 6244 do { 6245 p++; 6246 } while (p < s && j[p] == col); 6247 Bnnz++; 6248 } 6249 k = s; 6250 } 6251 6252 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6253 PetscCall(PetscMalloc1(Atot, &Aperm)); 6254 PetscCall(PetscMalloc1(Btot, &Bperm)); 6255 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6256 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6257 6258 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6259 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6260 for (r = 0; r < m; r++) { 6261 k = rowBegin[r]; 6262 mid = rowMid[r]; 6263 s = rowEnd[r]; 6264 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6265 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6266 Atot += mid - k; 6267 Btot += s - mid; 6268 6269 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6270 for (p = k; p < mid;) { 6271 col = j[p]; 6272 q = p; 6273 do { 6274 p++; 6275 } while (p < mid && j[p] == col); 6276 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6277 Annz++; 6278 } 6279 6280 for (p = mid; p < s;) { 6281 col = j[p]; 6282 q = p; 6283 do { 6284 p++; 6285 } while (p < s && j[p] == col); 6286 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6287 Bnnz++; 6288 } 6289 } 6290 /* Output */ 6291 *Aperm_ = Aperm; 6292 *Annz_ = Annz; 6293 *Atot_ = Atot; 6294 *Ajmap_ = Ajmap; 6295 *Bperm_ = Bperm; 6296 *Bnnz_ = Bnnz; 6297 *Btot_ = Btot; 6298 *Bjmap_ = Bjmap; 6299 PetscFunctionReturn(PETSC_SUCCESS); 6300 } 6301 6302 /* 6303 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6304 6305 Input Parameters: 6306 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6307 nnz: number of unique nonzeros in the merged matrix 6308 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6309 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6310 6311 Output Parameter: (memory is allocated by the caller) 6312 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6313 6314 Example: 6315 nnz1 = 4 6316 nnz = 6 6317 imap = [1,3,4,5] 6318 jmap = [0,3,5,6,7] 6319 then, 6320 jmap_new = [0,0,3,3,5,6,7] 6321 */ 6322 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6323 { 6324 PetscCount k, p; 6325 6326 PetscFunctionBegin; 6327 jmap_new[0] = 0; 6328 p = nnz; /* p loops over jmap_new[] backwards */ 6329 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6330 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6331 } 6332 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6333 PetscFunctionReturn(PETSC_SUCCESS); 6334 } 6335 6336 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6337 { 6338 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6339 6340 PetscFunctionBegin; 6341 PetscCall(PetscSFDestroy(&coo->sf)); 6342 PetscCall(PetscFree(coo->Aperm1)); 6343 PetscCall(PetscFree(coo->Bperm1)); 6344 PetscCall(PetscFree(coo->Ajmap1)); 6345 PetscCall(PetscFree(coo->Bjmap1)); 6346 PetscCall(PetscFree(coo->Aimap2)); 6347 PetscCall(PetscFree(coo->Bimap2)); 6348 PetscCall(PetscFree(coo->Aperm2)); 6349 PetscCall(PetscFree(coo->Bperm2)); 6350 PetscCall(PetscFree(coo->Ajmap2)); 6351 PetscCall(PetscFree(coo->Bjmap2)); 6352 PetscCall(PetscFree(coo->Cperm1)); 6353 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6354 PetscCall(PetscFree(coo)); 6355 PetscFunctionReturn(PETSC_SUCCESS); 6356 } 6357 6358 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6359 { 6360 MPI_Comm comm; 6361 PetscMPIInt rank, size; 6362 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6363 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6364 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6365 PetscContainer container; 6366 MatCOOStruct_MPIAIJ *coo; 6367 6368 PetscFunctionBegin; 6369 PetscCall(PetscFree(mpiaij->garray)); 6370 PetscCall(VecDestroy(&mpiaij->lvec)); 6371 #if defined(PETSC_USE_CTABLE) 6372 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6373 #else 6374 PetscCall(PetscFree(mpiaij->colmap)); 6375 #endif 6376 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6377 mat->assembled = PETSC_FALSE; 6378 mat->was_assembled = PETSC_FALSE; 6379 6380 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6381 PetscCallMPI(MPI_Comm_size(comm, &size)); 6382 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6383 PetscCall(PetscLayoutSetUp(mat->rmap)); 6384 PetscCall(PetscLayoutSetUp(mat->cmap)); 6385 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6386 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6387 PetscCall(MatGetLocalSize(mat, &m, &n)); 6388 PetscCall(MatGetSize(mat, &M, &N)); 6389 6390 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6391 /* entries come first, then local rows, then remote rows. */ 6392 PetscCount n1 = coo_n, *perm1; 6393 PetscInt *i1 = coo_i, *j1 = coo_j; 6394 6395 PetscCall(PetscMalloc1(n1, &perm1)); 6396 for (k = 0; k < n1; k++) perm1[k] = k; 6397 6398 /* Manipulate indices so that entries with negative row or col indices will have smallest 6399 row indices, local entries will have greater but negative row indices, and remote entries 6400 will have positive row indices. 6401 */ 6402 for (k = 0; k < n1; k++) { 6403 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6404 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6405 else { 6406 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6407 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6408 } 6409 } 6410 6411 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6412 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6413 6414 /* Advance k to the first entry we need to take care of */ 6415 for (k = 0; k < n1; k++) 6416 if (i1[k] > PETSC_INT_MIN) break; 6417 PetscCount i1start = k; 6418 6419 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6420 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6421 6422 PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6423 6424 /* Send remote rows to their owner */ 6425 /* Find which rows should be sent to which remote ranks*/ 6426 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6427 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6428 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6429 const PetscInt *ranges; 6430 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6431 6432 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6433 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6434 for (k = rem; k < n1;) { 6435 PetscMPIInt owner; 6436 PetscInt firstRow, lastRow; 6437 6438 /* Locate a row range */ 6439 firstRow = i1[k]; /* first row of this owner */ 6440 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6441 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6442 6443 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6444 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6445 6446 /* All entries in [k,p) belong to this remote owner */ 6447 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6448 PetscMPIInt *sendto2; 6449 PetscInt *nentries2; 6450 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6451 6452 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6453 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6454 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6455 PetscCall(PetscFree2(sendto, nentries2)); 6456 sendto = sendto2; 6457 nentries = nentries2; 6458 maxNsend = maxNsend2; 6459 } 6460 sendto[nsend] = owner; 6461 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6462 nsend++; 6463 k = p; 6464 } 6465 6466 /* Build 1st SF to know offsets on remote to send data */ 6467 PetscSF sf1; 6468 PetscInt nroots = 1, nroots2 = 0; 6469 PetscInt nleaves = nsend, nleaves2 = 0; 6470 PetscInt *offsets; 6471 PetscSFNode *iremote; 6472 6473 PetscCall(PetscSFCreate(comm, &sf1)); 6474 PetscCall(PetscMalloc1(nsend, &iremote)); 6475 PetscCall(PetscMalloc1(nsend, &offsets)); 6476 for (k = 0; k < nsend; k++) { 6477 iremote[k].rank = sendto[k]; 6478 iremote[k].index = 0; 6479 nleaves2 += nentries[k]; 6480 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6481 } 6482 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6483 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6484 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6485 PetscCall(PetscSFDestroy(&sf1)); 6486 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6487 6488 /* Build 2nd SF to send remote COOs to their owner */ 6489 PetscSF sf2; 6490 nroots = nroots2; 6491 nleaves = nleaves2; 6492 PetscCall(PetscSFCreate(comm, &sf2)); 6493 PetscCall(PetscSFSetFromOptions(sf2)); 6494 PetscCall(PetscMalloc1(nleaves, &iremote)); 6495 p = 0; 6496 for (k = 0; k < nsend; k++) { 6497 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6498 for (q = 0; q < nentries[k]; q++, p++) { 6499 iremote[p].rank = sendto[k]; 6500 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6501 } 6502 } 6503 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6504 6505 /* Send the remote COOs to their owner */ 6506 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6507 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6508 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6509 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6510 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6511 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6512 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6513 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6514 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6515 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6516 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6517 6518 PetscCall(PetscFree(offsets)); 6519 PetscCall(PetscFree2(sendto, nentries)); 6520 6521 /* Sort received COOs by row along with the permutation array */ 6522 for (k = 0; k < n2; k++) perm2[k] = k; 6523 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6524 6525 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6526 PetscCount *Cperm1; 6527 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6528 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6529 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6530 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6531 6532 /* Support for HYPRE matrices, kind of a hack. 6533 Swap min column with diagonal so that diagonal values will go first */ 6534 PetscBool hypre; 6535 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6536 if (hypre) { 6537 PetscInt *minj; 6538 PetscBT hasdiag; 6539 6540 PetscCall(PetscBTCreate(m, &hasdiag)); 6541 PetscCall(PetscMalloc1(m, &minj)); 6542 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6543 for (k = i1start; k < rem; k++) { 6544 if (j1[k] < cstart || j1[k] >= cend) continue; 6545 const PetscInt rindex = i1[k] - rstart; 6546 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6547 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6548 } 6549 for (k = 0; k < n2; k++) { 6550 if (j2[k] < cstart || j2[k] >= cend) continue; 6551 const PetscInt rindex = i2[k] - rstart; 6552 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6553 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6554 } 6555 for (k = i1start; k < rem; k++) { 6556 const PetscInt rindex = i1[k] - rstart; 6557 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6558 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6559 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6560 } 6561 for (k = 0; k < n2; k++) { 6562 const PetscInt rindex = i2[k] - rstart; 6563 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6564 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6565 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6566 } 6567 PetscCall(PetscBTDestroy(&hasdiag)); 6568 PetscCall(PetscFree(minj)); 6569 } 6570 6571 /* Split local COOs and received COOs into diag/offdiag portions */ 6572 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6573 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6574 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6575 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6576 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6577 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6578 6579 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6580 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6581 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6582 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6583 6584 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6585 PetscInt *Ai, *Bi; 6586 PetscInt *Aj, *Bj; 6587 6588 PetscCall(PetscMalloc1(m + 1, &Ai)); 6589 PetscCall(PetscMalloc1(m + 1, &Bi)); 6590 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6591 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6592 6593 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6594 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6595 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6596 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6597 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6598 6599 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6600 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6601 6602 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6603 /* expect nonzeros in A/B most likely have local contributing entries */ 6604 PetscInt Annz = Ai[m]; 6605 PetscInt Bnnz = Bi[m]; 6606 PetscCount *Ajmap1_new, *Bjmap1_new; 6607 6608 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6609 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6610 6611 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6612 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6613 6614 PetscCall(PetscFree(Aimap1)); 6615 PetscCall(PetscFree(Ajmap1)); 6616 PetscCall(PetscFree(Bimap1)); 6617 PetscCall(PetscFree(Bjmap1)); 6618 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6619 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6620 PetscCall(PetscFree(perm1)); 6621 PetscCall(PetscFree3(i2, j2, perm2)); 6622 6623 Ajmap1 = Ajmap1_new; 6624 Bjmap1 = Bjmap1_new; 6625 6626 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6627 if (Annz < Annz1 + Annz2) { 6628 PetscInt *Aj_new; 6629 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6630 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6631 PetscCall(PetscFree(Aj)); 6632 Aj = Aj_new; 6633 } 6634 6635 if (Bnnz < Bnnz1 + Bnnz2) { 6636 PetscInt *Bj_new; 6637 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6638 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6639 PetscCall(PetscFree(Bj)); 6640 Bj = Bj_new; 6641 } 6642 6643 /* Create new submatrices for on-process and off-process coupling */ 6644 PetscScalar *Aa, *Ba; 6645 MatType rtype; 6646 Mat_SeqAIJ *a, *b; 6647 PetscObjectState state; 6648 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6649 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6650 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6651 if (cstart) { 6652 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6653 } 6654 6655 PetscCall(MatGetRootType_Private(mat, &rtype)); 6656 6657 MatSeqXAIJGetOptions_Private(mpiaij->A); 6658 PetscCall(MatDestroy(&mpiaij->A)); 6659 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6660 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6661 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6662 6663 MatSeqXAIJGetOptions_Private(mpiaij->B); 6664 PetscCall(MatDestroy(&mpiaij->B)); 6665 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6666 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6667 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6668 6669 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6670 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6671 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6672 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6673 6674 a = (Mat_SeqAIJ *)mpiaij->A->data; 6675 b = (Mat_SeqAIJ *)mpiaij->B->data; 6676 a->free_a = PETSC_TRUE; 6677 a->free_ij = PETSC_TRUE; 6678 b->free_a = PETSC_TRUE; 6679 b->free_ij = PETSC_TRUE; 6680 a->maxnz = a->nz; 6681 b->maxnz = b->nz; 6682 6683 /* conversion must happen AFTER multiply setup */ 6684 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6685 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6686 PetscCall(VecDestroy(&mpiaij->lvec)); 6687 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6688 6689 // Put the COO struct in a container and then attach that to the matrix 6690 PetscCall(PetscMalloc1(1, &coo)); 6691 coo->n = coo_n; 6692 coo->sf = sf2; 6693 coo->sendlen = nleaves; 6694 coo->recvlen = nroots; 6695 coo->Annz = Annz; 6696 coo->Bnnz = Bnnz; 6697 coo->Annz2 = Annz2; 6698 coo->Bnnz2 = Bnnz2; 6699 coo->Atot1 = Atot1; 6700 coo->Atot2 = Atot2; 6701 coo->Btot1 = Btot1; 6702 coo->Btot2 = Btot2; 6703 coo->Ajmap1 = Ajmap1; 6704 coo->Aperm1 = Aperm1; 6705 coo->Bjmap1 = Bjmap1; 6706 coo->Bperm1 = Bperm1; 6707 coo->Aimap2 = Aimap2; 6708 coo->Ajmap2 = Ajmap2; 6709 coo->Aperm2 = Aperm2; 6710 coo->Bimap2 = Bimap2; 6711 coo->Bjmap2 = Bjmap2; 6712 coo->Bperm2 = Bperm2; 6713 coo->Cperm1 = Cperm1; 6714 // Allocate in preallocation. If not used, it has zero cost on host 6715 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6716 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6717 PetscCall(PetscContainerSetPointer(container, coo)); 6718 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6719 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6720 PetscCall(PetscContainerDestroy(&container)); 6721 PetscFunctionReturn(PETSC_SUCCESS); 6722 } 6723 6724 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6725 { 6726 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6727 Mat A = mpiaij->A, B = mpiaij->B; 6728 PetscScalar *Aa, *Ba; 6729 PetscScalar *sendbuf, *recvbuf; 6730 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6731 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6732 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6733 const PetscCount *Cperm1; 6734 PetscContainer container; 6735 MatCOOStruct_MPIAIJ *coo; 6736 6737 PetscFunctionBegin; 6738 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6739 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6740 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6741 sendbuf = coo->sendbuf; 6742 recvbuf = coo->recvbuf; 6743 Ajmap1 = coo->Ajmap1; 6744 Ajmap2 = coo->Ajmap2; 6745 Aimap2 = coo->Aimap2; 6746 Bjmap1 = coo->Bjmap1; 6747 Bjmap2 = coo->Bjmap2; 6748 Bimap2 = coo->Bimap2; 6749 Aperm1 = coo->Aperm1; 6750 Aperm2 = coo->Aperm2; 6751 Bperm1 = coo->Bperm1; 6752 Bperm2 = coo->Bperm2; 6753 Cperm1 = coo->Cperm1; 6754 6755 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6756 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6757 6758 /* Pack entries to be sent to remote */ 6759 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6760 6761 /* Send remote entries to their owner and overlap the communication with local computation */ 6762 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6763 /* Add local entries to A and B */ 6764 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6765 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6766 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6767 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6768 } 6769 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6770 PetscScalar sum = 0.0; 6771 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6772 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6773 } 6774 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6775 6776 /* Add received remote entries to A and B */ 6777 for (PetscCount i = 0; i < coo->Annz2; i++) { 6778 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6779 } 6780 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6781 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6782 } 6783 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6784 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6785 PetscFunctionReturn(PETSC_SUCCESS); 6786 } 6787 6788 /*MC 6789 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6790 6791 Options Database Keys: 6792 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6793 6794 Level: beginner 6795 6796 Notes: 6797 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6798 in this case the values associated with the rows and columns one passes in are set to zero 6799 in the matrix 6800 6801 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6802 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6803 6804 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6805 M*/ 6806 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6807 { 6808 Mat_MPIAIJ *b; 6809 PetscMPIInt size; 6810 6811 PetscFunctionBegin; 6812 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6813 6814 PetscCall(PetscNew(&b)); 6815 B->data = (void *)b; 6816 B->ops[0] = MatOps_Values; 6817 B->assembled = PETSC_FALSE; 6818 B->insertmode = NOT_SET_VALUES; 6819 b->size = size; 6820 6821 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6822 6823 /* build cache for off array entries formed */ 6824 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6825 6826 b->donotstash = PETSC_FALSE; 6827 b->colmap = NULL; 6828 b->garray = NULL; 6829 b->roworiented = PETSC_TRUE; 6830 6831 /* stuff used for matrix vector multiply */ 6832 b->lvec = NULL; 6833 b->Mvctx = NULL; 6834 6835 /* stuff for MatGetRow() */ 6836 b->rowindices = NULL; 6837 b->rowvalues = NULL; 6838 b->getrowactive = PETSC_FALSE; 6839 6840 /* flexible pointer used in CUSPARSE classes */ 6841 b->spptr = NULL; 6842 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6854 #if defined(PETSC_HAVE_CUDA) 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6856 #endif 6857 #if defined(PETSC_HAVE_HIP) 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6859 #endif 6860 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6862 #endif 6863 #if defined(PETSC_HAVE_MKL_SPARSE) 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6865 #endif 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6870 #if defined(PETSC_HAVE_ELEMENTAL) 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6872 #endif 6873 #if defined(PETSC_HAVE_SCALAPACK) 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6875 #endif 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6878 #if defined(PETSC_HAVE_HYPRE) 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6881 #endif 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6883 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6884 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6886 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6887 PetscFunctionReturn(PETSC_SUCCESS); 6888 } 6889 6890 /*@ 6891 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6892 and "off-diagonal" part of the matrix in CSR format. 6893 6894 Collective 6895 6896 Input Parameters: 6897 + comm - MPI communicator 6898 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6899 . n - This value should be the same as the local size used in creating the 6900 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6901 calculated if `N` is given) For square matrices `n` is almost always `m`. 6902 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6903 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6904 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6905 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6906 . a - matrix values 6907 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6908 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6909 - oa - matrix values 6910 6911 Output Parameter: 6912 . mat - the matrix 6913 6914 Level: advanced 6915 6916 Notes: 6917 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6918 must free the arrays once the matrix has been destroyed and not before. 6919 6920 The `i` and `j` indices are 0 based 6921 6922 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6923 6924 This sets local rows and cannot be used to set off-processor values. 6925 6926 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6927 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6928 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6929 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6930 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6931 communication if it is known that only local entries will be set. 6932 6933 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6934 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6935 @*/ 6936 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6937 { 6938 Mat_MPIAIJ *maij; 6939 6940 PetscFunctionBegin; 6941 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6942 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6943 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6944 PetscCall(MatCreate(comm, mat)); 6945 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6946 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6947 maij = (Mat_MPIAIJ *)(*mat)->data; 6948 6949 (*mat)->preallocated = PETSC_TRUE; 6950 6951 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6952 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6953 6954 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6955 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6956 6957 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6958 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6959 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6960 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6961 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6962 PetscFunctionReturn(PETSC_SUCCESS); 6963 } 6964 6965 typedef struct { 6966 Mat *mp; /* intermediate products */ 6967 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6968 PetscInt cp; /* number of intermediate products */ 6969 6970 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6971 PetscInt *startsj_s, *startsj_r; 6972 PetscScalar *bufa; 6973 Mat P_oth; 6974 6975 /* may take advantage of merging product->B */ 6976 Mat Bloc; /* B-local by merging diag and off-diag */ 6977 6978 /* cusparse does not have support to split between symbolic and numeric phases. 6979 When api_user is true, we don't need to update the numerical values 6980 of the temporary storage */ 6981 PetscBool reusesym; 6982 6983 /* support for COO values insertion */ 6984 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6985 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6986 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6987 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6988 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6989 PetscMemType mtype; 6990 6991 /* customization */ 6992 PetscBool abmerge; 6993 PetscBool P_oth_bind; 6994 } MatMatMPIAIJBACKEND; 6995 6996 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6997 { 6998 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6999 PetscInt i; 7000 7001 PetscFunctionBegin; 7002 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7003 PetscCall(PetscFree(mmdata->bufa)); 7004 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7005 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7006 PetscCall(MatDestroy(&mmdata->P_oth)); 7007 PetscCall(MatDestroy(&mmdata->Bloc)); 7008 PetscCall(PetscSFDestroy(&mmdata->sf)); 7009 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7010 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7011 PetscCall(PetscFree(mmdata->own[0])); 7012 PetscCall(PetscFree(mmdata->own)); 7013 PetscCall(PetscFree(mmdata->off[0])); 7014 PetscCall(PetscFree(mmdata->off)); 7015 PetscCall(PetscFree(mmdata)); 7016 PetscFunctionReturn(PETSC_SUCCESS); 7017 } 7018 7019 /* Copy selected n entries with indices in idx[] of A to v[]. 7020 If idx is NULL, copy the whole data array of A to v[] 7021 */ 7022 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7023 { 7024 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7025 7026 PetscFunctionBegin; 7027 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7028 if (f) { 7029 PetscCall((*f)(A, n, idx, v)); 7030 } else { 7031 const PetscScalar *vv; 7032 7033 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7034 if (n && idx) { 7035 PetscScalar *w = v; 7036 const PetscInt *oi = idx; 7037 PetscInt j; 7038 7039 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7040 } else { 7041 PetscCall(PetscArraycpy(v, vv, n)); 7042 } 7043 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7044 } 7045 PetscFunctionReturn(PETSC_SUCCESS); 7046 } 7047 7048 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7049 { 7050 MatMatMPIAIJBACKEND *mmdata; 7051 PetscInt i, n_d, n_o; 7052 7053 PetscFunctionBegin; 7054 MatCheckProduct(C, 1); 7055 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7056 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7057 if (!mmdata->reusesym) { /* update temporary matrices */ 7058 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7059 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7060 } 7061 mmdata->reusesym = PETSC_FALSE; 7062 7063 for (i = 0; i < mmdata->cp; i++) { 7064 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7065 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7066 } 7067 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7068 PetscInt noff; 7069 7070 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7071 if (mmdata->mptmp[i]) continue; 7072 if (noff) { 7073 PetscInt nown; 7074 7075 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7076 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7077 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7078 n_o += noff; 7079 n_d += nown; 7080 } else { 7081 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7082 7083 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7084 n_d += mm->nz; 7085 } 7086 } 7087 if (mmdata->hasoffproc) { /* offprocess insertion */ 7088 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7089 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7090 } 7091 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7092 PetscFunctionReturn(PETSC_SUCCESS); 7093 } 7094 7095 /* Support for Pt * A, A * P, or Pt * A * P */ 7096 #define MAX_NUMBER_INTERMEDIATE 4 7097 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7098 { 7099 Mat_Product *product = C->product; 7100 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7101 Mat_MPIAIJ *a, *p; 7102 MatMatMPIAIJBACKEND *mmdata; 7103 ISLocalToGlobalMapping P_oth_l2g = NULL; 7104 IS glob = NULL; 7105 const char *prefix; 7106 char pprefix[256]; 7107 const PetscInt *globidx, *P_oth_idx; 7108 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7109 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7110 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7111 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7112 /* a base offset; type-2: sparse with a local to global map table */ 7113 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7114 7115 MatProductType ptype; 7116 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7117 PetscMPIInt size; 7118 7119 PetscFunctionBegin; 7120 MatCheckProduct(C, 1); 7121 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7122 ptype = product->type; 7123 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7124 ptype = MATPRODUCT_AB; 7125 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7126 } 7127 switch (ptype) { 7128 case MATPRODUCT_AB: 7129 A = product->A; 7130 P = product->B; 7131 m = A->rmap->n; 7132 n = P->cmap->n; 7133 M = A->rmap->N; 7134 N = P->cmap->N; 7135 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7136 break; 7137 case MATPRODUCT_AtB: 7138 P = product->A; 7139 A = product->B; 7140 m = P->cmap->n; 7141 n = A->cmap->n; 7142 M = P->cmap->N; 7143 N = A->cmap->N; 7144 hasoffproc = PETSC_TRUE; 7145 break; 7146 case MATPRODUCT_PtAP: 7147 A = product->A; 7148 P = product->B; 7149 m = P->cmap->n; 7150 n = P->cmap->n; 7151 M = P->cmap->N; 7152 N = P->cmap->N; 7153 hasoffproc = PETSC_TRUE; 7154 break; 7155 default: 7156 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7157 } 7158 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7159 if (size == 1) hasoffproc = PETSC_FALSE; 7160 7161 /* defaults */ 7162 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7163 mp[i] = NULL; 7164 mptmp[i] = PETSC_FALSE; 7165 rmapt[i] = -1; 7166 cmapt[i] = -1; 7167 rmapa[i] = NULL; 7168 cmapa[i] = NULL; 7169 } 7170 7171 /* customization */ 7172 PetscCall(PetscNew(&mmdata)); 7173 mmdata->reusesym = product->api_user; 7174 if (ptype == MATPRODUCT_AB) { 7175 if (product->api_user) { 7176 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7177 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7178 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7179 PetscOptionsEnd(); 7180 } else { 7181 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7182 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7183 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7184 PetscOptionsEnd(); 7185 } 7186 } else if (ptype == MATPRODUCT_PtAP) { 7187 if (product->api_user) { 7188 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7189 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7190 PetscOptionsEnd(); 7191 } else { 7192 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7193 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7194 PetscOptionsEnd(); 7195 } 7196 } 7197 a = (Mat_MPIAIJ *)A->data; 7198 p = (Mat_MPIAIJ *)P->data; 7199 PetscCall(MatSetSizes(C, m, n, M, N)); 7200 PetscCall(PetscLayoutSetUp(C->rmap)); 7201 PetscCall(PetscLayoutSetUp(C->cmap)); 7202 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7203 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7204 7205 cp = 0; 7206 switch (ptype) { 7207 case MATPRODUCT_AB: /* A * P */ 7208 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7209 7210 /* A_diag * P_local (merged or not) */ 7211 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7212 /* P is product->B */ 7213 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7214 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7215 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7216 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7217 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7218 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7219 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7220 mp[cp]->product->api_user = product->api_user; 7221 PetscCall(MatProductSetFromOptions(mp[cp])); 7222 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7223 PetscCall(ISGetIndices(glob, &globidx)); 7224 rmapt[cp] = 1; 7225 cmapt[cp] = 2; 7226 cmapa[cp] = globidx; 7227 mptmp[cp] = PETSC_FALSE; 7228 cp++; 7229 } else { /* A_diag * P_diag and A_diag * P_off */ 7230 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7231 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7232 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7233 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7234 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7235 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7236 mp[cp]->product->api_user = product->api_user; 7237 PetscCall(MatProductSetFromOptions(mp[cp])); 7238 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7239 rmapt[cp] = 1; 7240 cmapt[cp] = 1; 7241 mptmp[cp] = PETSC_FALSE; 7242 cp++; 7243 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7244 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7245 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7246 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7247 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7248 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7249 mp[cp]->product->api_user = product->api_user; 7250 PetscCall(MatProductSetFromOptions(mp[cp])); 7251 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7252 rmapt[cp] = 1; 7253 cmapt[cp] = 2; 7254 cmapa[cp] = p->garray; 7255 mptmp[cp] = PETSC_FALSE; 7256 cp++; 7257 } 7258 7259 /* A_off * P_other */ 7260 if (mmdata->P_oth) { 7261 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7262 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7263 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7264 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7265 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7266 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7267 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7268 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7269 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7270 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7271 mp[cp]->product->api_user = product->api_user; 7272 PetscCall(MatProductSetFromOptions(mp[cp])); 7273 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7274 rmapt[cp] = 1; 7275 cmapt[cp] = 2; 7276 cmapa[cp] = P_oth_idx; 7277 mptmp[cp] = PETSC_FALSE; 7278 cp++; 7279 } 7280 break; 7281 7282 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7283 /* A is product->B */ 7284 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7285 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7286 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7287 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7288 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7289 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7290 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7291 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7292 mp[cp]->product->api_user = product->api_user; 7293 PetscCall(MatProductSetFromOptions(mp[cp])); 7294 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7295 PetscCall(ISGetIndices(glob, &globidx)); 7296 rmapt[cp] = 2; 7297 rmapa[cp] = globidx; 7298 cmapt[cp] = 2; 7299 cmapa[cp] = globidx; 7300 mptmp[cp] = PETSC_FALSE; 7301 cp++; 7302 } else { 7303 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 PetscCall(ISGetIndices(glob, &globidx)); 7313 rmapt[cp] = 1; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = globidx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7319 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7320 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7321 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7322 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7323 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7324 mp[cp]->product->api_user = product->api_user; 7325 PetscCall(MatProductSetFromOptions(mp[cp])); 7326 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7327 rmapt[cp] = 2; 7328 rmapa[cp] = p->garray; 7329 cmapt[cp] = 2; 7330 cmapa[cp] = globidx; 7331 mptmp[cp] = PETSC_FALSE; 7332 cp++; 7333 } 7334 break; 7335 case MATPRODUCT_PtAP: 7336 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7337 /* P is product->B */ 7338 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7339 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7340 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7341 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7342 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7343 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7344 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7345 mp[cp]->product->api_user = product->api_user; 7346 PetscCall(MatProductSetFromOptions(mp[cp])); 7347 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7348 PetscCall(ISGetIndices(glob, &globidx)); 7349 rmapt[cp] = 2; 7350 rmapa[cp] = globidx; 7351 cmapt[cp] = 2; 7352 cmapa[cp] = globidx; 7353 mptmp[cp] = PETSC_FALSE; 7354 cp++; 7355 if (mmdata->P_oth) { 7356 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7357 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7358 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7359 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7360 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7361 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7362 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7363 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7364 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7365 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7366 mp[cp]->product->api_user = product->api_user; 7367 PetscCall(MatProductSetFromOptions(mp[cp])); 7368 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7369 mptmp[cp] = PETSC_TRUE; 7370 cp++; 7371 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7372 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7373 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7374 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7375 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7376 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7377 mp[cp]->product->api_user = product->api_user; 7378 PetscCall(MatProductSetFromOptions(mp[cp])); 7379 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7380 rmapt[cp] = 2; 7381 rmapa[cp] = globidx; 7382 cmapt[cp] = 2; 7383 cmapa[cp] = P_oth_idx; 7384 mptmp[cp] = PETSC_FALSE; 7385 cp++; 7386 } 7387 break; 7388 default: 7389 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7390 } 7391 /* sanity check */ 7392 if (size > 1) 7393 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7394 7395 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7396 for (i = 0; i < cp; i++) { 7397 mmdata->mp[i] = mp[i]; 7398 mmdata->mptmp[i] = mptmp[i]; 7399 } 7400 mmdata->cp = cp; 7401 C->product->data = mmdata; 7402 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7403 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7404 7405 /* memory type */ 7406 mmdata->mtype = PETSC_MEMTYPE_HOST; 7407 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7408 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7409 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7410 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7411 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7412 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7413 7414 /* prepare coo coordinates for values insertion */ 7415 7416 /* count total nonzeros of those intermediate seqaij Mats 7417 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7418 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7419 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7420 */ 7421 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7422 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7423 if (mptmp[cp]) continue; 7424 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7425 const PetscInt *rmap = rmapa[cp]; 7426 const PetscInt mr = mp[cp]->rmap->n; 7427 const PetscInt rs = C->rmap->rstart; 7428 const PetscInt re = C->rmap->rend; 7429 const PetscInt *ii = mm->i; 7430 for (i = 0; i < mr; i++) { 7431 const PetscInt gr = rmap[i]; 7432 const PetscInt nz = ii[i + 1] - ii[i]; 7433 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7434 else ncoo_oown += nz; /* this row is local */ 7435 } 7436 } else ncoo_d += mm->nz; 7437 } 7438 7439 /* 7440 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7441 7442 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7443 7444 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7445 7446 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7447 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7448 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7449 7450 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7451 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7452 */ 7453 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7454 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7455 7456 /* gather (i,j) of nonzeros inserted by remote procs */ 7457 if (hasoffproc) { 7458 PetscSF msf; 7459 PetscInt ncoo2, *coo_i2, *coo_j2; 7460 7461 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7462 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7463 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7464 7465 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7466 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7467 PetscInt *idxoff = mmdata->off[cp]; 7468 PetscInt *idxown = mmdata->own[cp]; 7469 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7470 const PetscInt *rmap = rmapa[cp]; 7471 const PetscInt *cmap = cmapa[cp]; 7472 const PetscInt *ii = mm->i; 7473 PetscInt *coi = coo_i + ncoo_o; 7474 PetscInt *coj = coo_j + ncoo_o; 7475 const PetscInt mr = mp[cp]->rmap->n; 7476 const PetscInt rs = C->rmap->rstart; 7477 const PetscInt re = C->rmap->rend; 7478 const PetscInt cs = C->cmap->rstart; 7479 for (i = 0; i < mr; i++) { 7480 const PetscInt *jj = mm->j + ii[i]; 7481 const PetscInt gr = rmap[i]; 7482 const PetscInt nz = ii[i + 1] - ii[i]; 7483 if (gr < rs || gr >= re) { /* this is an offproc row */ 7484 for (j = ii[i]; j < ii[i + 1]; j++) { 7485 *coi++ = gr; 7486 *idxoff++ = j; 7487 } 7488 if (!cmapt[cp]) { /* already global */ 7489 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7490 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7491 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7492 } else { /* offdiag */ 7493 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7494 } 7495 ncoo_o += nz; 7496 } else { /* this is a local row */ 7497 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7498 } 7499 } 7500 } 7501 mmdata->off[cp + 1] = idxoff; 7502 mmdata->own[cp + 1] = idxown; 7503 } 7504 7505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7506 PetscInt incoo_o; 7507 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7508 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7509 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7510 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7511 ncoo = ncoo_d + ncoo_oown + ncoo2; 7512 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7513 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7514 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7515 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7516 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7517 PetscCall(PetscFree2(coo_i, coo_j)); 7518 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7519 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7520 coo_i = coo_i2; 7521 coo_j = coo_j2; 7522 } else { /* no offproc values insertion */ 7523 ncoo = ncoo_d; 7524 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7525 7526 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7527 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7528 PetscCall(PetscSFSetUp(mmdata->sf)); 7529 } 7530 mmdata->hasoffproc = hasoffproc; 7531 7532 /* gather (i,j) of nonzeros inserted locally */ 7533 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7534 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7535 PetscInt *coi = coo_i + ncoo_d; 7536 PetscInt *coj = coo_j + ncoo_d; 7537 const PetscInt *jj = mm->j; 7538 const PetscInt *ii = mm->i; 7539 const PetscInt *cmap = cmapa[cp]; 7540 const PetscInt *rmap = rmapa[cp]; 7541 const PetscInt mr = mp[cp]->rmap->n; 7542 const PetscInt rs = C->rmap->rstart; 7543 const PetscInt re = C->rmap->rend; 7544 const PetscInt cs = C->cmap->rstart; 7545 7546 if (mptmp[cp]) continue; 7547 if (rmapt[cp] == 1) { /* consecutive rows */ 7548 /* fill coo_i */ 7549 for (i = 0; i < mr; i++) { 7550 const PetscInt gr = i + rs; 7551 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7552 } 7553 /* fill coo_j */ 7554 if (!cmapt[cp]) { /* type-0, already global */ 7555 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7556 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7557 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7558 } else { /* type-2, local to global for sparse columns */ 7559 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7560 } 7561 ncoo_d += mm->nz; 7562 } else if (rmapt[cp] == 2) { /* sparse rows */ 7563 for (i = 0; i < mr; i++) { 7564 const PetscInt *jj = mm->j + ii[i]; 7565 const PetscInt gr = rmap[i]; 7566 const PetscInt nz = ii[i + 1] - ii[i]; 7567 if (gr >= rs && gr < re) { /* local rows */ 7568 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7569 if (!cmapt[cp]) { /* type-0, already global */ 7570 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7571 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7572 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7573 } else { /* type-2, local to global for sparse columns */ 7574 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7575 } 7576 ncoo_d += nz; 7577 } 7578 } 7579 } 7580 } 7581 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7582 PetscCall(ISDestroy(&glob)); 7583 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7584 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7585 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7586 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7587 7588 /* set block sizes */ 7589 A = product->A; 7590 P = product->B; 7591 switch (ptype) { 7592 case MATPRODUCT_PtAP: 7593 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7594 break; 7595 case MATPRODUCT_RARt: 7596 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7597 break; 7598 case MATPRODUCT_ABC: 7599 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7600 break; 7601 case MATPRODUCT_AB: 7602 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7603 break; 7604 case MATPRODUCT_AtB: 7605 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7606 break; 7607 case MATPRODUCT_ABt: 7608 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7609 break; 7610 default: 7611 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7612 } 7613 7614 /* preallocate with COO data */ 7615 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7616 PetscCall(PetscFree2(coo_i, coo_j)); 7617 PetscFunctionReturn(PETSC_SUCCESS); 7618 } 7619 7620 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7621 { 7622 Mat_Product *product = mat->product; 7623 #if defined(PETSC_HAVE_DEVICE) 7624 PetscBool match = PETSC_FALSE; 7625 PetscBool usecpu = PETSC_FALSE; 7626 #else 7627 PetscBool match = PETSC_TRUE; 7628 #endif 7629 7630 PetscFunctionBegin; 7631 MatCheckProduct(mat, 1); 7632 #if defined(PETSC_HAVE_DEVICE) 7633 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7634 if (match) { /* we can always fallback to the CPU if requested */ 7635 switch (product->type) { 7636 case MATPRODUCT_AB: 7637 if (product->api_user) { 7638 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7639 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7640 PetscOptionsEnd(); 7641 } else { 7642 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7643 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7644 PetscOptionsEnd(); 7645 } 7646 break; 7647 case MATPRODUCT_AtB: 7648 if (product->api_user) { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7650 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } else { 7653 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7654 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7655 PetscOptionsEnd(); 7656 } 7657 break; 7658 case MATPRODUCT_PtAP: 7659 if (product->api_user) { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7661 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } else { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7665 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } 7668 break; 7669 default: 7670 break; 7671 } 7672 match = (PetscBool)!usecpu; 7673 } 7674 #endif 7675 if (match) { 7676 switch (product->type) { 7677 case MATPRODUCT_AB: 7678 case MATPRODUCT_AtB: 7679 case MATPRODUCT_PtAP: 7680 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7681 break; 7682 default: 7683 break; 7684 } 7685 } 7686 /* fallback to MPIAIJ ops */ 7687 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7688 PetscFunctionReturn(PETSC_SUCCESS); 7689 } 7690 7691 /* 7692 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7693 7694 n - the number of block indices in cc[] 7695 cc - the block indices (must be large enough to contain the indices) 7696 */ 7697 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7698 { 7699 PetscInt cnt = -1, nidx, j; 7700 const PetscInt *idx; 7701 7702 PetscFunctionBegin; 7703 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7704 if (nidx) { 7705 cnt = 0; 7706 cc[cnt] = idx[0] / bs; 7707 for (j = 1; j < nidx; j++) { 7708 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7709 } 7710 } 7711 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7712 *n = cnt + 1; 7713 PetscFunctionReturn(PETSC_SUCCESS); 7714 } 7715 7716 /* 7717 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7718 7719 ncollapsed - the number of block indices 7720 collapsed - the block indices (must be large enough to contain the indices) 7721 */ 7722 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7723 { 7724 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7725 7726 PetscFunctionBegin; 7727 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7728 for (i = start + 1; i < start + bs; i++) { 7729 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7730 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7731 cprevtmp = cprev; 7732 cprev = merged; 7733 merged = cprevtmp; 7734 } 7735 *ncollapsed = nprev; 7736 if (collapsed) *collapsed = cprev; 7737 PetscFunctionReturn(PETSC_SUCCESS); 7738 } 7739 7740 /* 7741 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7742 7743 Input Parameter: 7744 . Amat - matrix 7745 - symmetrize - make the result symmetric 7746 + scale - scale with diagonal 7747 7748 Output Parameter: 7749 . a_Gmat - output scalar graph >= 0 7750 7751 */ 7752 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7753 { 7754 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7755 MPI_Comm comm; 7756 Mat Gmat; 7757 PetscBool ismpiaij, isseqaij; 7758 Mat a, b, c; 7759 MatType jtype; 7760 7761 PetscFunctionBegin; 7762 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7763 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7764 PetscCall(MatGetSize(Amat, &MM, &NN)); 7765 PetscCall(MatGetBlockSize(Amat, &bs)); 7766 nloc = (Iend - Istart) / bs; 7767 7768 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7769 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7770 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7771 7772 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7773 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7774 implementation */ 7775 if (bs > 1) { 7776 PetscCall(MatGetType(Amat, &jtype)); 7777 PetscCall(MatCreate(comm, &Gmat)); 7778 PetscCall(MatSetType(Gmat, jtype)); 7779 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7780 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7781 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7782 PetscInt *d_nnz, *o_nnz; 7783 MatScalar *aa, val, *AA; 7784 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7785 7786 if (isseqaij) { 7787 a = Amat; 7788 b = NULL; 7789 } else { 7790 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7791 a = d->A; 7792 b = d->B; 7793 } 7794 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7795 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7796 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7797 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7798 const PetscInt *cols1, *cols2; 7799 7800 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7801 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7802 nnz[brow / bs] = nc2 / bs; 7803 if (nc2 % bs) ok = 0; 7804 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7805 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7806 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7807 if (nc1 != nc2) ok = 0; 7808 else { 7809 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7810 if (cols1[jj] != cols2[jj]) ok = 0; 7811 if (cols1[jj] % bs != jj % bs) ok = 0; 7812 } 7813 } 7814 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7815 } 7816 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7817 if (!ok) { 7818 PetscCall(PetscFree2(d_nnz, o_nnz)); 7819 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7820 goto old_bs; 7821 } 7822 } 7823 } 7824 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7825 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7826 PetscCall(PetscFree2(d_nnz, o_nnz)); 7827 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7828 // diag 7829 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7830 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7831 7832 ai = aseq->i; 7833 n = ai[brow + 1] - ai[brow]; 7834 aj = aseq->j + ai[brow]; 7835 for (PetscInt k = 0; k < n; k += bs) { // block columns 7836 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7837 val = 0; 7838 if (index_size == 0) { 7839 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7840 aa = aseq->a + ai[brow + ii] + k; 7841 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7842 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7843 } 7844 } 7845 } else { // use (index,index) value if provided 7846 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7847 PetscInt ii = index[iii]; 7848 aa = aseq->a + ai[brow + ii] + k; 7849 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7850 PetscInt jj = index[jjj]; 7851 val += PetscAbs(PetscRealPart(aa[jj])); 7852 } 7853 } 7854 } 7855 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7856 AA[k / bs] = val; 7857 } 7858 grow = Istart / bs + brow / bs; 7859 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7860 } 7861 // off-diag 7862 if (ismpiaij) { 7863 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7864 const PetscScalar *vals; 7865 const PetscInt *cols, *garray = aij->garray; 7866 7867 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7868 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7869 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7870 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7871 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7872 AA[k / bs] = 0; 7873 AJ[cidx] = garray[cols[k]] / bs; 7874 } 7875 nc = ncols / bs; 7876 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7877 if (index_size == 0) { 7878 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7879 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7880 for (PetscInt k = 0; k < ncols; k += bs) { 7881 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7882 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7883 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7884 } 7885 } 7886 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7887 } 7888 } else { // use (index,index) value if provided 7889 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7890 PetscInt ii = index[iii]; 7891 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7892 for (PetscInt k = 0; k < ncols; k += bs) { 7893 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7894 PetscInt jj = index[jjj]; 7895 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7896 } 7897 } 7898 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7899 } 7900 } 7901 grow = Istart / bs + brow / bs; 7902 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7903 } 7904 } 7905 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7906 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7907 PetscCall(PetscFree2(AA, AJ)); 7908 } else { 7909 const PetscScalar *vals; 7910 const PetscInt *idx; 7911 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7912 old_bs: 7913 /* 7914 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7915 */ 7916 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7917 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7918 if (isseqaij) { 7919 PetscInt max_d_nnz; 7920 7921 /* 7922 Determine exact preallocation count for (sequential) scalar matrix 7923 */ 7924 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7925 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7926 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7927 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7928 PetscCall(PetscFree3(w0, w1, w2)); 7929 } else if (ismpiaij) { 7930 Mat Daij, Oaij; 7931 const PetscInt *garray; 7932 PetscInt max_d_nnz; 7933 7934 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7935 /* 7936 Determine exact preallocation count for diagonal block portion of scalar matrix 7937 */ 7938 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7939 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7940 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7941 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7942 PetscCall(PetscFree3(w0, w1, w2)); 7943 /* 7944 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7945 */ 7946 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7947 o_nnz[jj] = 0; 7948 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7949 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7950 o_nnz[jj] += ncols; 7951 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7952 } 7953 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7954 } 7955 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7956 /* get scalar copy (norms) of matrix */ 7957 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7958 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7959 PetscCall(PetscFree2(d_nnz, o_nnz)); 7960 for (Ii = Istart; Ii < Iend; Ii++) { 7961 PetscInt dest_row = Ii / bs; 7962 7963 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7964 for (jj = 0; jj < ncols; jj++) { 7965 PetscInt dest_col = idx[jj] / bs; 7966 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7967 7968 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7969 } 7970 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7971 } 7972 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7973 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7974 } 7975 } else { 7976 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7977 else { 7978 Gmat = Amat; 7979 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7980 } 7981 if (isseqaij) { 7982 a = Gmat; 7983 b = NULL; 7984 } else { 7985 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7986 a = d->A; 7987 b = d->B; 7988 } 7989 if (filter >= 0 || scale) { 7990 /* take absolute value of each entry */ 7991 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7992 MatInfo info; 7993 PetscScalar *avals; 7994 7995 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7996 PetscCall(MatSeqAIJGetArray(c, &avals)); 7997 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7998 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7999 } 8000 } 8001 } 8002 if (symmetrize) { 8003 PetscBool isset, issym; 8004 8005 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8006 if (!isset || !issym) { 8007 Mat matTrans; 8008 8009 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8010 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8011 PetscCall(MatDestroy(&matTrans)); 8012 } 8013 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8014 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8015 if (scale) { 8016 /* scale c for all diagonal values = 1 or -1 */ 8017 Vec diag; 8018 8019 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8020 PetscCall(MatGetDiagonal(Gmat, diag)); 8021 PetscCall(VecReciprocal(diag)); 8022 PetscCall(VecSqrtAbs(diag)); 8023 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8024 PetscCall(VecDestroy(&diag)); 8025 } 8026 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8027 if (filter >= 0) { 8028 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8029 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8030 } 8031 *a_Gmat = Gmat; 8032 PetscFunctionReturn(PETSC_SUCCESS); 8033 } 8034 8035 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8036 { 8037 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8038 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8039 8040 PetscFunctionBegin; 8041 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8042 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8043 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8044 PetscFunctionReturn(PETSC_SUCCESS); 8045 } 8046 8047 /* 8048 Special version for direct calls from Fortran 8049 */ 8050 8051 /* Change these macros so can be used in void function */ 8052 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8053 #undef PetscCall 8054 #define PetscCall(...) \ 8055 do { \ 8056 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8057 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8058 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8059 return; \ 8060 } \ 8061 } while (0) 8062 8063 #undef SETERRQ 8064 #define SETERRQ(comm, ierr, ...) \ 8065 do { \ 8066 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8067 return; \ 8068 } while (0) 8069 8070 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8071 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8072 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8073 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8074 #else 8075 #endif 8076 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8077 { 8078 Mat mat = *mmat; 8079 PetscInt m = *mm, n = *mn; 8080 InsertMode addv = *maddv; 8081 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8082 PetscScalar value; 8083 8084 MatCheckPreallocated(mat, 1); 8085 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8086 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8087 { 8088 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8089 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8090 PetscBool roworiented = aij->roworiented; 8091 8092 /* Some Variables required in the macro */ 8093 Mat A = aij->A; 8094 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8095 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8096 MatScalar *aa; 8097 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8098 Mat B = aij->B; 8099 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8100 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8101 MatScalar *ba; 8102 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8103 * cannot use "#if defined" inside a macro. */ 8104 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8105 8106 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8107 PetscInt nonew = a->nonew; 8108 MatScalar *ap1, *ap2; 8109 8110 PetscFunctionBegin; 8111 PetscCall(MatSeqAIJGetArray(A, &aa)); 8112 PetscCall(MatSeqAIJGetArray(B, &ba)); 8113 for (i = 0; i < m; i++) { 8114 if (im[i] < 0) continue; 8115 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8116 if (im[i] >= rstart && im[i] < rend) { 8117 row = im[i] - rstart; 8118 lastcol1 = -1; 8119 rp1 = aj + ai[row]; 8120 ap1 = aa + ai[row]; 8121 rmax1 = aimax[row]; 8122 nrow1 = ailen[row]; 8123 low1 = 0; 8124 high1 = nrow1; 8125 lastcol2 = -1; 8126 rp2 = bj + bi[row]; 8127 ap2 = ba + bi[row]; 8128 rmax2 = bimax[row]; 8129 nrow2 = bilen[row]; 8130 low2 = 0; 8131 high2 = nrow2; 8132 8133 for (j = 0; j < n; j++) { 8134 if (roworiented) value = v[i * n + j]; 8135 else value = v[i + j * m]; 8136 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8137 if (in[j] >= cstart && in[j] < cend) { 8138 col = in[j] - cstart; 8139 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8140 } else if (in[j] < 0) continue; 8141 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8142 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8143 } else { 8144 if (mat->was_assembled) { 8145 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8146 #if defined(PETSC_USE_CTABLE) 8147 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8148 col--; 8149 #else 8150 col = aij->colmap[in[j]] - 1; 8151 #endif 8152 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8153 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8154 col = in[j]; 8155 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8156 B = aij->B; 8157 b = (Mat_SeqAIJ *)B->data; 8158 bimax = b->imax; 8159 bi = b->i; 8160 bilen = b->ilen; 8161 bj = b->j; 8162 rp2 = bj + bi[row]; 8163 ap2 = ba + bi[row]; 8164 rmax2 = bimax[row]; 8165 nrow2 = bilen[row]; 8166 low2 = 0; 8167 high2 = nrow2; 8168 bm = aij->B->rmap->n; 8169 ba = b->a; 8170 inserted = PETSC_FALSE; 8171 } 8172 } else col = in[j]; 8173 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8174 } 8175 } 8176 } else if (!aij->donotstash) { 8177 if (roworiented) { 8178 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8179 } else { 8180 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8181 } 8182 } 8183 } 8184 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8185 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8186 } 8187 PetscFunctionReturnVoid(); 8188 } 8189 8190 /* Undefining these here since they were redefined from their original definition above! No 8191 * other PETSc functions should be defined past this point, as it is impossible to recover the 8192 * original definitions */ 8193 #undef PetscCall 8194 #undef SETERRQ 8195