1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 MatGetRowMaxAbs_MPIAIJ, 2793 /*69*/ MatGetRowMinAbs_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFDColoringApply_AIJ, 2797 MatSetFromOptions_MPIAIJ, 2798 MatFindZeroDiagonals_MPIAIJ, 2799 /*75*/ NULL, 2800 NULL, 2801 NULL, 2802 MatLoad_MPIAIJ, 2803 NULL, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 /*89*/ MatBindToCPU_MPIAIJ, 2814 MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 /*94*/ NULL, 2819 MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 /*99*/ NULL, 2824 NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 /*104*/ MatMissingDiagonal_MPIAIJ, 2829 MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 /*109*/ NULL, 2834 MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*114*/ MatGetMultiProcBlock_MPIAIJ, 2839 MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 /*119*/ MatCreateSubMatricesMPI_MPIAIJ, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*124*/ NULL, 2849 NULL, 2850 NULL, 2851 MatSetBlockSizes_MPIAIJ, 2852 NULL, 2853 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 NULL, 2857 NULL, 2858 /*134*/ NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 MatEliminateZeros_MPIAIJ, 2862 MatGetRowSumAbs_MPIAIJ, 2863 /*139*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCopyHashToXAIJ_MPI_Hash, 2867 MatGetCurrentMemType_MPIAIJ}; 2868 2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2870 { 2871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2872 2873 PetscFunctionBegin; 2874 PetscCall(MatStoreValues(aij->A)); 2875 PetscCall(MatStoreValues(aij->B)); 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2880 { 2881 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2882 2883 PetscFunctionBegin; 2884 PetscCall(MatRetrieveValues(aij->A)); 2885 PetscCall(MatRetrieveValues(aij->B)); 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2890 { 2891 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2892 PetscMPIInt size; 2893 2894 PetscFunctionBegin; 2895 if (B->hash_active) { 2896 B->ops[0] = b->cops; 2897 B->hash_active = PETSC_FALSE; 2898 } 2899 PetscCall(PetscLayoutSetUp(B->rmap)); 2900 PetscCall(PetscLayoutSetUp(B->cmap)); 2901 2902 #if defined(PETSC_USE_CTABLE) 2903 PetscCall(PetscHMapIDestroy(&b->colmap)); 2904 #else 2905 PetscCall(PetscFree(b->colmap)); 2906 #endif 2907 PetscCall(PetscFree(b->garray)); 2908 PetscCall(VecDestroy(&b->lvec)); 2909 PetscCall(VecScatterDestroy(&b->Mvctx)); 2910 2911 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2912 2913 MatSeqXAIJGetOptions_Private(b->B); 2914 PetscCall(MatDestroy(&b->B)); 2915 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2916 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2917 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2918 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2919 MatSeqXAIJRestoreOptions_Private(b->B); 2920 2921 MatSeqXAIJGetOptions_Private(b->A); 2922 PetscCall(MatDestroy(&b->A)); 2923 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2924 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2925 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2926 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2927 MatSeqXAIJRestoreOptions_Private(b->A); 2928 2929 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2930 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2931 B->preallocated = PETSC_TRUE; 2932 B->was_assembled = PETSC_FALSE; 2933 B->assembled = PETSC_FALSE; 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2938 { 2939 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2940 PetscBool ondiagreset, offdiagreset, memoryreset; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2944 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2945 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2946 2947 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2948 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2949 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2950 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2951 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2952 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2956 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2957 PetscCall(VecScatterDestroy(&b->Mvctx)); 2958 2959 B->preallocated = PETSC_TRUE; 2960 B->was_assembled = PETSC_FALSE; 2961 B->assembled = PETSC_FALSE; 2962 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2963 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2964 PetscFunctionReturn(PETSC_SUCCESS); 2965 } 2966 2967 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2968 { 2969 Mat mat; 2970 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2971 2972 PetscFunctionBegin; 2973 *newmat = NULL; 2974 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2975 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2976 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2977 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2978 a = (Mat_MPIAIJ *)mat->data; 2979 2980 mat->factortype = matin->factortype; 2981 mat->assembled = matin->assembled; 2982 mat->insertmode = NOT_SET_VALUES; 2983 2984 a->size = oldmat->size; 2985 a->rank = oldmat->rank; 2986 a->donotstash = oldmat->donotstash; 2987 a->roworiented = oldmat->roworiented; 2988 a->rowindices = NULL; 2989 a->rowvalues = NULL; 2990 a->getrowactive = PETSC_FALSE; 2991 2992 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2993 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2994 if (matin->hash_active) { 2995 PetscCall(MatSetUp(mat)); 2996 } else { 2997 mat->preallocated = matin->preallocated; 2998 if (oldmat->colmap) { 2999 #if defined(PETSC_USE_CTABLE) 3000 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3001 #else 3002 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3003 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3004 #endif 3005 } else a->colmap = NULL; 3006 if (oldmat->garray) { 3007 PetscInt len; 3008 len = oldmat->B->cmap->n; 3009 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3010 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3011 } else a->garray = NULL; 3012 3013 /* It may happen MatDuplicate is called with a non-assembled matrix 3014 In fact, MatDuplicate only requires the matrix to be preallocated 3015 This may happen inside a DMCreateMatrix_Shell */ 3016 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3017 if (oldmat->Mvctx) { 3018 a->Mvctx = oldmat->Mvctx; 3019 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3020 } 3021 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3022 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3023 } 3024 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3025 *newmat = mat; 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3030 { 3031 PetscBool isbinary, ishdf5; 3032 3033 PetscFunctionBegin; 3034 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3035 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3036 /* force binary viewer to load .info file if it has not yet done so */ 3037 PetscCall(PetscViewerSetUp(viewer)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3040 if (isbinary) { 3041 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3042 } else if (ishdf5) { 3043 #if defined(PETSC_HAVE_HDF5) 3044 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3045 #else 3046 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3047 #endif 3048 } else { 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3055 { 3056 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3057 PetscInt *rowidxs, *colidxs; 3058 PetscScalar *matvals; 3059 3060 PetscFunctionBegin; 3061 PetscCall(PetscViewerSetUp(viewer)); 3062 3063 /* read in matrix header */ 3064 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3065 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3066 M = header[1]; 3067 N = header[2]; 3068 nz = header[3]; 3069 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3070 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3071 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3072 3073 /* set block sizes from the viewer's .info file */ 3074 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3075 /* set global sizes if not set already */ 3076 if (mat->rmap->N < 0) mat->rmap->N = M; 3077 if (mat->cmap->N < 0) mat->cmap->N = N; 3078 PetscCall(PetscLayoutSetUp(mat->rmap)); 3079 PetscCall(PetscLayoutSetUp(mat->cmap)); 3080 3081 /* check if the matrix sizes are correct */ 3082 PetscCall(MatGetSize(mat, &rows, &cols)); 3083 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3084 3085 /* read in row lengths and build row indices */ 3086 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3087 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3088 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3089 rowidxs[0] = 0; 3090 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3091 if (nz != PETSC_INT_MAX) { 3092 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3093 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3094 } 3095 3096 /* read in column indices and matrix values */ 3097 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3100 /* store matrix indices and values */ 3101 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3102 PetscCall(PetscFree(rowidxs)); 3103 PetscCall(PetscFree2(colidxs, matvals)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3109 { 3110 IS iscol_local; 3111 PetscBool isstride; 3112 PetscMPIInt gisstride = 0; 3113 3114 PetscFunctionBegin; 3115 /* check if we are grabbing all columns*/ 3116 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3117 3118 if (isstride) { 3119 PetscInt start, len, mstart, mlen; 3120 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3121 PetscCall(ISGetLocalSize(iscol, &len)); 3122 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3123 if (mstart == start && mlen - mstart == len) gisstride = 1; 3124 } 3125 3126 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3127 if (gisstride) { 3128 PetscInt N; 3129 PetscCall(MatGetSize(mat, NULL, &N)); 3130 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3131 PetscCall(ISSetIdentity(iscol_local)); 3132 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3133 } else { 3134 PetscInt cbs; 3135 PetscCall(ISGetBlockSize(iscol, &cbs)); 3136 PetscCall(ISAllGather(iscol, &iscol_local)); 3137 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3138 } 3139 3140 *isseq = iscol_local; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 /* 3145 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3146 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3147 3148 Input Parameters: 3149 + mat - matrix 3150 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->rstart <= isrow[i] < mat->rend 3152 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->cstart <= iscol[i] < mat->cend 3154 3155 Output Parameters: 3156 + isrow_d - sequential row index set for retrieving mat->A 3157 . iscol_d - sequential column index set for retrieving mat->A 3158 . iscol_o - sequential column index set for retrieving mat->B 3159 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3160 */ 3161 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3162 { 3163 Vec x, cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray, *cmaparray; 3166 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3168 Mat B = a->B; 3169 Vec lvec = a->lvec, lcmap; 3170 PetscInt i, cstart, cend, Bn = B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx = a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3176 PetscCall(ISGetLocalSize(iscol, &ncols)); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 PetscCall(MatCreateVecs(mat, &x, NULL)); 3180 PetscCall(VecSet(x, -1.0)); 3181 PetscCall(VecDuplicate(x, &cmap)); 3182 PetscCall(VecSet(cmap, -1.0)); 3183 3184 /* Get start indices */ 3185 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3186 isstart -= ncols; 3187 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3188 3189 PetscCall(ISGetIndices(iscol, &is_idx)); 3190 PetscCall(VecGetArray(x, &xarray)); 3191 PetscCall(VecGetArray(cmap, &cmaparray)); 3192 PetscCall(PetscMalloc1(ncols, &idx)); 3193 for (i = 0; i < ncols; i++) { 3194 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3197 } 3198 PetscCall(VecRestoreArray(x, &xarray)); 3199 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3200 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3201 3202 /* Get iscol_d */ 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3204 PetscCall(ISGetBlockSize(iscol, &i)); 3205 PetscCall(ISSetBlockSize(*iscol_d, i)); 3206 3207 /* Get isrow_d */ 3208 PetscCall(ISGetLocalSize(isrow, &m)); 3209 rstart = mat->rmap->rstart; 3210 PetscCall(PetscMalloc1(m, &idx)); 3211 PetscCall(ISGetIndices(isrow, &is_idx)); 3212 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3213 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3214 3215 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3216 PetscCall(ISGetBlockSize(isrow, &i)); 3217 PetscCall(ISSetBlockSize(*isrow_d, i)); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 PetscCall(VecDuplicate(lvec, &lcmap)); 3224 3225 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 PetscCall(PetscMalloc1(Bn, &idx)); 3232 PetscCall(PetscMalloc1(Bn, &cmap1)); 3233 3234 PetscCall(VecGetArray(lvec, &xarray)); 3235 PetscCall(VecGetArray(lcmap, &cmaparray)); 3236 for (i = 0; i < Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 PetscCall(VecRestoreArray(lvec, &xarray)); 3244 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3245 3246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 PetscCall(PetscFree(idx)); 3250 *garray = cmap1; 3251 3252 PetscCall(VecDestroy(&x)); 3253 PetscCall(VecDestroy(&cmap)); 3254 PetscCall(VecDestroy(&lcmap)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3260 { 3261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3262 Mat M = NULL; 3263 MPI_Comm comm; 3264 IS iscol_d, isrow_d, iscol_o; 3265 Mat Asub = NULL, Bsub = NULL; 3266 PetscInt n, count, M_size, N_size; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3270 3271 if (call == MAT_REUSE_MATRIX) { 3272 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3274 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3277 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3280 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3281 3282 /* Update diagonal and off-diagonal portions of submat */ 3283 asub = (Mat_MPIAIJ *)(*submat)->data; 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3285 PetscCall(ISGetLocalSize(iscol_o, &n)); 3286 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3287 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3288 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3289 3290 } else { /* call == MAT_INITIAL_MATRIX) */ 3291 PetscInt *garray, *garray_compact; 3292 PetscInt BsubN; 3293 3294 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3295 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3296 3297 /* Create local submatrices Asub and Bsub */ 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3300 3301 // Compact garray so its not of size Bn 3302 PetscCall(ISGetSize(iscol_o, &count)); 3303 PetscCall(PetscMalloc1(count, &garray_compact)); 3304 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3305 3306 /* Create submatrix M */ 3307 PetscCall(ISGetSize(isrow, &M_size)); 3308 PetscCall(ISGetSize(iscol, &N_size)); 3309 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3310 3311 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3312 asub = (Mat_MPIAIJ *)M->data; 3313 3314 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3315 n = asub->B->cmap->N; 3316 if (BsubN > n) { 3317 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3318 const PetscInt *idx; 3319 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3320 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3321 3322 PetscCall(PetscMalloc1(n, &idx_new)); 3323 j = 0; 3324 PetscCall(ISGetIndices(iscol_o, &idx)); 3325 for (i = 0; i < n; i++) { 3326 if (j >= BsubN) break; 3327 while (subgarray[i] > garray[j]) j++; 3328 3329 if (subgarray[i] == garray[j]) { 3330 idx_new[i] = idx[j++]; 3331 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3332 } 3333 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3334 3335 PetscCall(ISDestroy(&iscol_o)); 3336 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3337 3338 } else if (BsubN < n) { 3339 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3340 } 3341 3342 PetscCall(PetscFree(garray)); 3343 *submat = M; 3344 3345 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3346 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3347 PetscCall(ISDestroy(&isrow_d)); 3348 3349 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3350 PetscCall(ISDestroy(&iscol_d)); 3351 3352 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3353 PetscCall(ISDestroy(&iscol_o)); 3354 } 3355 PetscFunctionReturn(PETSC_SUCCESS); 3356 } 3357 3358 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3359 { 3360 IS iscol_local = NULL, isrow_d; 3361 PetscInt csize; 3362 PetscInt n, i, j, start, end; 3363 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3364 MPI_Comm comm; 3365 3366 PetscFunctionBegin; 3367 /* If isrow has same processor distribution as mat, 3368 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3369 if (call == MAT_REUSE_MATRIX) { 3370 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3371 if (isrow_d) { 3372 sameRowDist = PETSC_TRUE; 3373 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3374 } else { 3375 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3376 if (iscol_local) { 3377 sameRowDist = PETSC_TRUE; 3378 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3379 } 3380 } 3381 } else { 3382 /* Check if isrow has same processor distribution as mat */ 3383 sameDist[0] = PETSC_FALSE; 3384 PetscCall(ISGetLocalSize(isrow, &n)); 3385 if (!n) { 3386 sameDist[0] = PETSC_TRUE; 3387 } else { 3388 PetscCall(ISGetMinMax(isrow, &i, &j)); 3389 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3390 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3391 } 3392 3393 /* Check if iscol has same processor distribution as mat */ 3394 sameDist[1] = PETSC_FALSE; 3395 PetscCall(ISGetLocalSize(iscol, &n)); 3396 if (!n) { 3397 sameDist[1] = PETSC_TRUE; 3398 } else { 3399 PetscCall(ISGetMinMax(iscol, &i, &j)); 3400 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3401 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3402 } 3403 3404 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3405 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3406 sameRowDist = tsameDist[0]; 3407 } 3408 3409 if (sameRowDist) { 3410 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3411 /* isrow and iscol have same processor distribution as mat */ 3412 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } else { /* sameRowDist */ 3415 /* isrow has same processor distribution as mat */ 3416 if (call == MAT_INITIAL_MATRIX) { 3417 PetscBool sorted; 3418 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3419 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3420 PetscCall(ISGetSize(iscol, &i)); 3421 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3422 3423 PetscCall(ISSorted(iscol_local, &sorted)); 3424 if (sorted) { 3425 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 } else { /* call == MAT_REUSE_MATRIX */ 3430 IS iscol_sub; 3431 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3432 if (iscol_sub) { 3433 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3434 PetscFunctionReturn(PETSC_SUCCESS); 3435 } 3436 } 3437 } 3438 } 3439 3440 /* General case: iscol -> iscol_local which has global size of iscol */ 3441 if (call == MAT_REUSE_MATRIX) { 3442 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3443 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3444 } else { 3445 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3446 } 3447 3448 PetscCall(ISGetLocalSize(iscol, &csize)); 3449 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3450 3451 if (call == MAT_INITIAL_MATRIX) { 3452 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3453 PetscCall(ISDestroy(&iscol_local)); 3454 } 3455 PetscFunctionReturn(PETSC_SUCCESS); 3456 } 3457 3458 /*@C 3459 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3460 and "off-diagonal" part of the matrix in CSR format. 3461 3462 Collective 3463 3464 Input Parameters: 3465 + comm - MPI communicator 3466 . M - the global row size 3467 . N - the global column size 3468 . A - "diagonal" portion of matrix 3469 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3470 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3471 3472 Output Parameter: 3473 . mat - the matrix, with input `A` as its local diagonal matrix 3474 3475 Level: advanced 3476 3477 Notes: 3478 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3479 3480 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3481 3482 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3483 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3484 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3485 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3486 3487 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3488 @*/ 3489 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3490 { 3491 PetscInt m, n; 3492 MatType mpi_mat_type; 3493 Mat_MPIAIJ *mpiaij; 3494 Mat C; 3495 3496 PetscFunctionBegin; 3497 PetscCall(MatCreate(comm, &C)); 3498 PetscCall(MatGetSize(A, &m, &n)); 3499 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3500 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3501 3502 PetscCall(MatSetSizes(C, m, n, M, N)); 3503 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3504 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3505 PetscCall(MatSetType(C, mpi_mat_type)); 3506 if (!garray) { 3507 const PetscScalar *ba; 3508 3509 B->nonzerostate++; 3510 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3511 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3512 } 3513 3514 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3515 PetscCall(PetscLayoutSetUp(C->rmap)); 3516 PetscCall(PetscLayoutSetUp(C->cmap)); 3517 3518 mpiaij = (Mat_MPIAIJ *)C->data; 3519 mpiaij->A = A; 3520 mpiaij->B = B; 3521 mpiaij->garray = garray; 3522 C->preallocated = PETSC_TRUE; 3523 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3524 3525 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3526 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3527 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3528 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3529 */ 3530 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3531 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3532 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3533 *mat = C; 3534 PetscFunctionReturn(PETSC_SUCCESS); 3535 } 3536 3537 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3538 3539 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3540 { 3541 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3542 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3543 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3544 Mat M, Msub, B = a->B; 3545 MatScalar *aa; 3546 Mat_SeqAIJ *aij; 3547 PetscInt *garray = a->garray, *colsub, Ncols; 3548 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3549 IS iscol_sub, iscmap; 3550 const PetscInt *is_idx, *cmap; 3551 PetscBool allcolumns = PETSC_FALSE; 3552 MPI_Comm comm; 3553 3554 PetscFunctionBegin; 3555 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3556 if (call == MAT_REUSE_MATRIX) { 3557 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3558 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3559 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3560 3561 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3562 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3563 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3565 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3566 3567 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3568 3569 } else { /* call == MAT_INITIAL_MATRIX) */ 3570 PetscBool flg; 3571 3572 PetscCall(ISGetLocalSize(iscol, &n)); 3573 PetscCall(ISGetSize(iscol, &Ncols)); 3574 3575 /* (1) iscol -> nonscalable iscol_local */ 3576 /* Check for special case: each processor gets entire matrix columns */ 3577 PetscCall(ISIdentity(iscol_local, &flg)); 3578 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3579 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3580 if (allcolumns) { 3581 iscol_sub = iscol_local; 3582 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3583 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3584 3585 } else { 3586 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3587 PetscInt *idx, *cmap1, k; 3588 PetscCall(PetscMalloc1(Ncols, &idx)); 3589 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3590 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3591 count = 0; 3592 k = 0; 3593 for (i = 0; i < Ncols; i++) { 3594 j = is_idx[i]; 3595 if (j >= cstart && j < cend) { 3596 /* diagonal part of mat */ 3597 idx[count] = j; 3598 cmap1[count++] = i; /* column index in submat */ 3599 } else if (Bn) { 3600 /* off-diagonal part of mat */ 3601 if (j == garray[k]) { 3602 idx[count] = j; 3603 cmap1[count++] = i; /* column index in submat */ 3604 } else if (j > garray[k]) { 3605 while (j > garray[k] && k < Bn - 1) k++; 3606 if (j == garray[k]) { 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } 3610 } 3611 } 3612 } 3613 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3614 3615 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3616 PetscCall(ISGetBlockSize(iscol, &cbs)); 3617 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3618 3619 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3620 } 3621 3622 /* (3) Create sequential Msub */ 3623 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3624 } 3625 3626 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3627 aij = (Mat_SeqAIJ *)Msub->data; 3628 ii = aij->i; 3629 PetscCall(ISGetIndices(iscmap, &cmap)); 3630 3631 /* 3632 m - number of local rows 3633 Ncols - number of columns (same on all processors) 3634 rstart - first row in new global matrix generated 3635 */ 3636 PetscCall(MatGetSize(Msub, &m, NULL)); 3637 3638 if (call == MAT_INITIAL_MATRIX) { 3639 /* (4) Create parallel newmat */ 3640 PetscMPIInt rank, size; 3641 PetscInt csize; 3642 3643 PetscCallMPI(MPI_Comm_size(comm, &size)); 3644 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3645 3646 /* 3647 Determine the number of non-zeros in the diagonal and off-diagonal 3648 portions of the matrix in order to do correct preallocation 3649 */ 3650 3651 /* first get start and end of "diagonal" columns */ 3652 PetscCall(ISGetLocalSize(iscol, &csize)); 3653 if (csize == PETSC_DECIDE) { 3654 PetscCall(ISGetSize(isrow, &mglobal)); 3655 if (mglobal == Ncols) { /* square matrix */ 3656 nlocal = m; 3657 } else { 3658 nlocal = Ncols / size + ((Ncols % size) > rank); 3659 } 3660 } else { 3661 nlocal = csize; 3662 } 3663 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3664 rstart = rend - nlocal; 3665 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3666 3667 /* next, compute all the lengths */ 3668 jj = aij->j; 3669 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3670 olens = dlens + m; 3671 for (i = 0; i < m; i++) { 3672 jend = ii[i + 1] - ii[i]; 3673 olen = 0; 3674 dlen = 0; 3675 for (j = 0; j < jend; j++) { 3676 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3677 else dlen++; 3678 jj++; 3679 } 3680 olens[i] = olen; 3681 dlens[i] = dlen; 3682 } 3683 3684 PetscCall(ISGetBlockSize(isrow, &bs)); 3685 PetscCall(ISGetBlockSize(iscol, &cbs)); 3686 3687 PetscCall(MatCreate(comm, &M)); 3688 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3689 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3690 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3691 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3692 PetscCall(PetscFree(dlens)); 3693 3694 } else { /* call == MAT_REUSE_MATRIX */ 3695 M = *newmat; 3696 PetscCall(MatGetLocalSize(M, &i, NULL)); 3697 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3698 PetscCall(MatZeroEntries(M)); 3699 /* 3700 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3701 rather than the slower MatSetValues(). 3702 */ 3703 M->was_assembled = PETSC_TRUE; 3704 M->assembled = PETSC_FALSE; 3705 } 3706 3707 /* (5) Set values of Msub to *newmat */ 3708 PetscCall(PetscMalloc1(count, &colsub)); 3709 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3710 3711 jj = aij->j; 3712 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3713 for (i = 0; i < m; i++) { 3714 row = rstart + i; 3715 nz = ii[i + 1] - ii[i]; 3716 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3717 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3718 jj += nz; 3719 aa += nz; 3720 } 3721 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3722 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3723 3724 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3725 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3726 3727 PetscCall(PetscFree(colsub)); 3728 3729 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3730 if (call == MAT_INITIAL_MATRIX) { 3731 *newmat = M; 3732 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3733 PetscCall(MatDestroy(&Msub)); 3734 3735 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3736 PetscCall(ISDestroy(&iscol_sub)); 3737 3738 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3739 PetscCall(ISDestroy(&iscmap)); 3740 3741 if (iscol_local) { 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3743 PetscCall(ISDestroy(&iscol_local)); 3744 } 3745 } 3746 PetscFunctionReturn(PETSC_SUCCESS); 3747 } 3748 3749 /* 3750 Not great since it makes two copies of the submatrix, first an SeqAIJ 3751 in local and then by concatenating the local matrices the end result. 3752 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3753 3754 This requires a sequential iscol with all indices. 3755 */ 3756 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3757 { 3758 PetscMPIInt rank, size; 3759 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3760 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3761 Mat M, Mreuse; 3762 MatScalar *aa, *vwork; 3763 MPI_Comm comm; 3764 Mat_SeqAIJ *aij; 3765 PetscBool colflag, allcolumns = PETSC_FALSE; 3766 3767 PetscFunctionBegin; 3768 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3769 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3770 PetscCallMPI(MPI_Comm_size(comm, &size)); 3771 3772 /* Check for special case: each processor gets entire matrix columns */ 3773 PetscCall(ISIdentity(iscol, &colflag)); 3774 PetscCall(ISGetLocalSize(iscol, &n)); 3775 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3776 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3777 3778 if (call == MAT_REUSE_MATRIX) { 3779 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3780 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3781 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3782 } else { 3783 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3784 } 3785 3786 /* 3787 m - number of local rows 3788 n - number of columns (same on all processors) 3789 rstart - first row in new global matrix generated 3790 */ 3791 PetscCall(MatGetSize(Mreuse, &m, &n)); 3792 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3793 if (call == MAT_INITIAL_MATRIX) { 3794 aij = (Mat_SeqAIJ *)Mreuse->data; 3795 ii = aij->i; 3796 jj = aij->j; 3797 3798 /* 3799 Determine the number of non-zeros in the diagonal and off-diagonal 3800 portions of the matrix in order to do correct preallocation 3801 */ 3802 3803 /* first get start and end of "diagonal" columns */ 3804 if (csize == PETSC_DECIDE) { 3805 PetscCall(ISGetSize(isrow, &mglobal)); 3806 if (mglobal == n) { /* square matrix */ 3807 nlocal = m; 3808 } else { 3809 nlocal = n / size + ((n % size) > rank); 3810 } 3811 } else { 3812 nlocal = csize; 3813 } 3814 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3815 rstart = rend - nlocal; 3816 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3817 3818 /* next, compute all the lengths */ 3819 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3820 olens = dlens + m; 3821 for (i = 0; i < m; i++) { 3822 jend = ii[i + 1] - ii[i]; 3823 olen = 0; 3824 dlen = 0; 3825 for (j = 0; j < jend; j++) { 3826 if (*jj < rstart || *jj >= rend) olen++; 3827 else dlen++; 3828 jj++; 3829 } 3830 olens[i] = olen; 3831 dlens[i] = dlen; 3832 } 3833 PetscCall(MatCreate(comm, &M)); 3834 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3835 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3836 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3837 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3838 PetscCall(PetscFree(dlens)); 3839 } else { 3840 PetscInt ml, nl; 3841 3842 M = *newmat; 3843 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3844 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3845 PetscCall(MatZeroEntries(M)); 3846 /* 3847 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3848 rather than the slower MatSetValues(). 3849 */ 3850 M->was_assembled = PETSC_TRUE; 3851 M->assembled = PETSC_FALSE; 3852 } 3853 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3854 aij = (Mat_SeqAIJ *)Mreuse->data; 3855 ii = aij->i; 3856 jj = aij->j; 3857 3858 /* trigger copy to CPU if needed */ 3859 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3860 for (i = 0; i < m; i++) { 3861 row = rstart + i; 3862 nz = ii[i + 1] - ii[i]; 3863 cwork = jj; 3864 jj = PetscSafePointerPlusOffset(jj, nz); 3865 vwork = aa; 3866 aa = PetscSafePointerPlusOffset(aa, nz); 3867 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3868 } 3869 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3870 3871 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3872 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3873 *newmat = M; 3874 3875 /* save submatrix used in processor for next request */ 3876 if (call == MAT_INITIAL_MATRIX) { 3877 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3878 PetscCall(MatDestroy(&Mreuse)); 3879 } 3880 PetscFunctionReturn(PETSC_SUCCESS); 3881 } 3882 3883 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3884 { 3885 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3886 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3887 const PetscInt *JJ; 3888 PetscBool nooffprocentries; 3889 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3890 3891 PetscFunctionBegin; 3892 PetscCall(PetscLayoutSetUp(B->rmap)); 3893 PetscCall(PetscLayoutSetUp(B->cmap)); 3894 m = B->rmap->n; 3895 cstart = B->cmap->rstart; 3896 cend = B->cmap->rend; 3897 rstart = B->rmap->rstart; 3898 irstart = Ii[0]; 3899 3900 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3901 3902 if (PetscDefined(USE_DEBUG)) { 3903 for (i = 0; i < m; i++) { 3904 nnz = Ii[i + 1] - Ii[i]; 3905 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3906 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3907 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3908 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3909 } 3910 } 3911 3912 for (i = 0; i < m; i++) { 3913 nnz = Ii[i + 1] - Ii[i]; 3914 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3915 nnz_max = PetscMax(nnz_max, nnz); 3916 d = 0; 3917 for (j = 0; j < nnz; j++) { 3918 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3919 } 3920 d_nnz[i] = d; 3921 o_nnz[i] = nnz - d; 3922 } 3923 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3924 PetscCall(PetscFree2(d_nnz, o_nnz)); 3925 3926 for (i = 0; i < m; i++) { 3927 ii = i + rstart; 3928 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3929 } 3930 nooffprocentries = B->nooffprocentries; 3931 B->nooffprocentries = PETSC_TRUE; 3932 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3933 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3934 B->nooffprocentries = nooffprocentries; 3935 3936 /* count number of entries below block diagonal */ 3937 PetscCall(PetscFree(Aij->ld)); 3938 PetscCall(PetscCalloc1(m, &ld)); 3939 Aij->ld = ld; 3940 for (i = 0; i < m; i++) { 3941 nnz = Ii[i + 1] - Ii[i]; 3942 j = 0; 3943 while (j < nnz && J[j] < cstart) j++; 3944 ld[i] = j; 3945 if (J) J += nnz; 3946 } 3947 3948 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3949 PetscFunctionReturn(PETSC_SUCCESS); 3950 } 3951 3952 /*@ 3953 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3954 (the default parallel PETSc format). 3955 3956 Collective 3957 3958 Input Parameters: 3959 + B - the matrix 3960 . i - the indices into `j` for the start of each local row (indices start with zero) 3961 . j - the column indices for each local row (indices start with zero) 3962 - v - optional values in the matrix 3963 3964 Level: developer 3965 3966 Notes: 3967 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3968 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3969 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3970 3971 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3972 3973 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3974 3975 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3976 3977 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3978 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3979 3980 The format which is used for the sparse matrix input, is equivalent to a 3981 row-major ordering.. i.e for the following matrix, the input data expected is 3982 as shown 3983 .vb 3984 1 0 0 3985 2 0 3 P0 3986 ------- 3987 4 5 6 P1 3988 3989 Process0 [P0] rows_owned=[0,1] 3990 i = {0,1,3} [size = nrow+1 = 2+1] 3991 j = {0,0,2} [size = 3] 3992 v = {1,2,3} [size = 3] 3993 3994 Process1 [P1] rows_owned=[2] 3995 i = {0,3} [size = nrow+1 = 1+1] 3996 j = {0,1,2} [size = 3] 3997 v = {4,5,6} [size = 3] 3998 .ve 3999 4000 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4001 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4004 { 4005 PetscFunctionBegin; 4006 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4007 PetscFunctionReturn(PETSC_SUCCESS); 4008 } 4009 4010 /*@ 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4015 4016 Collective 4017 4018 Input Parameters: 4019 + B - the matrix 4020 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4021 (same value is used for all local rows) 4022 . d_nnz - array containing the number of nonzeros in the various rows of the 4023 DIAGONAL portion of the local submatrix (possibly different for each row) 4024 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4025 The size of this array is equal to the number of local rows, i.e 'm'. 4026 For matrices that will be factored, you must leave room for (and set) 4027 the diagonal entry even if it is zero. 4028 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4029 submatrix (same value is used for all local rows). 4030 - o_nnz - array containing the number of nonzeros in the various rows of the 4031 OFF-DIAGONAL portion of the local submatrix (possibly different for 4032 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4033 structure. The size of this array is equal to the number 4034 of local rows, i.e 'm'. 4035 4036 Example Usage: 4037 Consider the following 8x8 matrix with 34 non-zero values, that is 4038 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4039 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4040 as follows 4041 4042 .vb 4043 1 2 0 | 0 3 0 | 0 4 4044 Proc0 0 5 6 | 7 0 0 | 8 0 4045 9 0 10 | 11 0 0 | 12 0 4046 ------------------------------------- 4047 13 0 14 | 15 16 17 | 0 0 4048 Proc1 0 18 0 | 19 20 21 | 0 0 4049 0 0 0 | 22 23 0 | 24 0 4050 ------------------------------------- 4051 Proc2 25 26 27 | 0 0 28 | 29 0 4052 30 0 0 | 31 32 33 | 0 34 4053 .ve 4054 4055 This can be represented as a collection of submatrices as 4056 .vb 4057 A B C 4058 D E F 4059 G H I 4060 .ve 4061 4062 Where the submatrices A,B,C are owned by proc0, D,E,F are 4063 owned by proc1, G,H,I are owned by proc2. 4064 4065 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4066 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4067 The 'M','N' parameters are 8,8, and have the same values on all procs. 4068 4069 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4070 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4071 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4072 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4073 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4074 matrix, and [DF] as another `MATSEQAIJ` matrix. 4075 4076 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4077 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4078 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4079 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4080 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4081 In this case, the values of `d_nz`, `o_nz` are 4082 .vb 4083 proc0 dnz = 2, o_nz = 2 4084 proc1 dnz = 3, o_nz = 2 4085 proc2 dnz = 1, o_nz = 4 4086 .ve 4087 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4088 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4089 for proc3. i.e we are using 12+15+10=37 storage locations to store 4090 34 values. 4091 4092 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4093 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4094 In the above case the values for `d_nnz`, `o_nnz` are 4095 .vb 4096 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4097 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4098 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4099 .ve 4100 Here the space allocated is sum of all the above values i.e 34, and 4101 hence pre-allocation is perfect. 4102 4103 Level: intermediate 4104 4105 Notes: 4106 If the *_nnz parameter is given then the *_nz parameter is ignored 4107 4108 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4109 storage. The stored row and column indices begin with zero. 4110 See [Sparse Matrices](sec_matsparse) for details. 4111 4112 The parallel matrix is partitioned such that the first m0 rows belong to 4113 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4114 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4115 4116 The DIAGONAL portion of the local submatrix of a processor can be defined 4117 as the submatrix which is obtained by extraction the part corresponding to 4118 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4119 first row that belongs to the processor, r2 is the last row belonging to 4120 the this processor, and c1-c2 is range of indices of the local part of a 4121 vector suitable for applying the matrix to. This is an mxn matrix. In the 4122 common case of a square matrix, the row and column ranges are the same and 4123 the DIAGONAL part is also square. The remaining portion of the local 4124 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4125 4126 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4127 4128 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4129 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4130 You can also run with the option `-info` and look for messages with the string 4131 malloc in them to see if additional memory allocation was needed. 4132 4133 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4134 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4135 @*/ 4136 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4137 { 4138 PetscFunctionBegin; 4139 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4140 PetscValidType(B, 1); 4141 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4142 PetscFunctionReturn(PETSC_SUCCESS); 4143 } 4144 4145 /*@ 4146 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4147 CSR format for the local rows. 4148 4149 Collective 4150 4151 Input Parameters: 4152 + comm - MPI communicator 4153 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4154 . n - This value should be the same as the local size used in creating the 4155 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4156 calculated if `N` is given) For square matrices n is almost always `m`. 4157 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4158 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4159 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4160 . j - global column indices 4161 - a - optional matrix values 4162 4163 Output Parameter: 4164 . mat - the matrix 4165 4166 Level: intermediate 4167 4168 Notes: 4169 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4170 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4171 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4172 4173 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4174 4175 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4176 4177 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4178 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4179 4180 The format which is used for the sparse matrix input, is equivalent to a 4181 row-major ordering, i.e., for the following matrix, the input data expected is 4182 as shown 4183 .vb 4184 1 0 0 4185 2 0 3 P0 4186 ------- 4187 4 5 6 P1 4188 4189 Process0 [P0] rows_owned=[0,1] 4190 i = {0,1,3} [size = nrow+1 = 2+1] 4191 j = {0,0,2} [size = 3] 4192 v = {1,2,3} [size = 3] 4193 4194 Process1 [P1] rows_owned=[2] 4195 i = {0,3} [size = nrow+1 = 1+1] 4196 j = {0,1,2} [size = 3] 4197 v = {4,5,6} [size = 3] 4198 .ve 4199 4200 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4201 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4202 @*/ 4203 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4204 { 4205 PetscFunctionBegin; 4206 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4207 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4208 PetscCall(MatCreate(comm, mat)); 4209 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4210 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4211 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4212 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4213 PetscFunctionReturn(PETSC_SUCCESS); 4214 } 4215 4216 /*@ 4217 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4218 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4219 from `MatCreateMPIAIJWithArrays()` 4220 4221 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4222 4223 Collective 4224 4225 Input Parameters: 4226 + mat - the matrix 4227 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4228 . n - This value should be the same as the local size used in creating the 4229 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4230 calculated if N is given) For square matrices n is almost always m. 4231 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4232 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4233 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4234 . J - column indices 4235 - v - matrix values 4236 4237 Level: deprecated 4238 4239 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4240 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4241 @*/ 4242 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4243 { 4244 PetscInt nnz, i; 4245 PetscBool nooffprocentries; 4246 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4247 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4248 PetscScalar *ad, *ao; 4249 PetscInt ldi, Iii, md; 4250 const PetscInt *Adi = Ad->i; 4251 PetscInt *ld = Aij->ld; 4252 4253 PetscFunctionBegin; 4254 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4255 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4256 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4257 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4258 4259 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4260 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4261 4262 for (i = 0; i < m; i++) { 4263 if (PetscDefined(USE_DEBUG)) { 4264 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4265 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4266 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4267 } 4268 } 4269 nnz = Ii[i + 1] - Ii[i]; 4270 Iii = Ii[i]; 4271 ldi = ld[i]; 4272 md = Adi[i + 1] - Adi[i]; 4273 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4274 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4275 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4276 ad += md; 4277 ao += nnz - md; 4278 } 4279 nooffprocentries = mat->nooffprocentries; 4280 mat->nooffprocentries = PETSC_TRUE; 4281 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4282 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4283 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4284 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4285 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4286 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4287 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4288 mat->nooffprocentries = nooffprocentries; 4289 PetscFunctionReturn(PETSC_SUCCESS); 4290 } 4291 4292 /*@ 4293 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4294 4295 Collective 4296 4297 Input Parameters: 4298 + mat - the matrix 4299 - v - matrix values, stored by row 4300 4301 Level: intermediate 4302 4303 Notes: 4304 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4305 4306 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4307 4308 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4309 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4310 @*/ 4311 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4312 { 4313 PetscInt nnz, i, m; 4314 PetscBool nooffprocentries; 4315 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4318 PetscScalar *ad, *ao; 4319 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4320 PetscInt ldi, Iii, md; 4321 PetscInt *ld = Aij->ld; 4322 4323 PetscFunctionBegin; 4324 m = mat->rmap->n; 4325 4326 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4327 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4328 Iii = 0; 4329 for (i = 0; i < m; i++) { 4330 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4331 ldi = ld[i]; 4332 md = Adi[i + 1] - Adi[i]; 4333 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4334 ad += md; 4335 if (ao) { 4336 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4337 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4338 ao += nnz - md; 4339 } 4340 Iii += nnz; 4341 } 4342 nooffprocentries = mat->nooffprocentries; 4343 mat->nooffprocentries = PETSC_TRUE; 4344 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4345 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4346 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4347 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4348 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4349 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4350 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4351 mat->nooffprocentries = nooffprocentries; 4352 PetscFunctionReturn(PETSC_SUCCESS); 4353 } 4354 4355 /*@ 4356 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4357 (the default parallel PETSc format). For good matrix assembly performance 4358 the user should preallocate the matrix storage by setting the parameters 4359 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4360 4361 Collective 4362 4363 Input Parameters: 4364 + comm - MPI communicator 4365 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4366 This value should be the same as the local size used in creating the 4367 y vector for the matrix-vector product y = Ax. 4368 . n - This value should be the same as the local size used in creating the 4369 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4370 calculated if N is given) For square matrices n is almost always m. 4371 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4372 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4373 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4374 (same value is used for all local rows) 4375 . d_nnz - array containing the number of nonzeros in the various rows of the 4376 DIAGONAL portion of the local submatrix (possibly different for each row) 4377 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4378 The size of this array is equal to the number of local rows, i.e 'm'. 4379 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4380 submatrix (same value is used for all local rows). 4381 - o_nnz - array containing the number of nonzeros in the various rows of the 4382 OFF-DIAGONAL portion of the local submatrix (possibly different for 4383 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4384 structure. The size of this array is equal to the number 4385 of local rows, i.e 'm'. 4386 4387 Output Parameter: 4388 . A - the matrix 4389 4390 Options Database Keys: 4391 + -mat_no_inode - Do not use inodes 4392 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4393 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4394 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4395 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4396 4397 Level: intermediate 4398 4399 Notes: 4400 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4401 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4402 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4403 4404 If the *_nnz parameter is given then the *_nz parameter is ignored 4405 4406 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4407 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4408 storage requirements for this matrix. 4409 4410 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4411 processor than it must be used on all processors that share the object for 4412 that argument. 4413 4414 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4415 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4416 4417 The user MUST specify either the local or global matrix dimensions 4418 (possibly both). 4419 4420 The parallel matrix is partitioned across processors such that the 4421 first `m0` rows belong to process 0, the next `m1` rows belong to 4422 process 1, the next `m2` rows belong to process 2, etc., where 4423 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4424 values corresponding to [m x N] submatrix. 4425 4426 The columns are logically partitioned with the n0 columns belonging 4427 to 0th partition, the next n1 columns belonging to the next 4428 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4429 4430 The DIAGONAL portion of the local submatrix on any given processor 4431 is the submatrix corresponding to the rows and columns m,n 4432 corresponding to the given processor. i.e diagonal matrix on 4433 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4434 etc. The remaining portion of the local submatrix [m x (N-n)] 4435 constitute the OFF-DIAGONAL portion. The example below better 4436 illustrates this concept. The two matrices, the DIAGONAL portion and 4437 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4438 4439 For a square global matrix we define each processor's diagonal portion 4440 to be its local rows and the corresponding columns (a square submatrix); 4441 each processor's off-diagonal portion encompasses the remainder of the 4442 local matrix (a rectangular submatrix). 4443 4444 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4445 4446 When calling this routine with a single process communicator, a matrix of 4447 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4448 type of communicator, use the construction mechanism 4449 .vb 4450 MatCreate(..., &A); 4451 MatSetType(A, MATMPIAIJ); 4452 MatSetSizes(A, m, n, M, N); 4453 MatMPIAIJSetPreallocation(A, ...); 4454 .ve 4455 4456 By default, this format uses inodes (identical nodes) when possible. 4457 We search for consecutive rows with the same nonzero structure, thereby 4458 reusing matrix information to achieve increased efficiency. 4459 4460 Example Usage: 4461 Consider the following 8x8 matrix with 34 non-zero values, that is 4462 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4463 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4464 as follows 4465 4466 .vb 4467 1 2 0 | 0 3 0 | 0 4 4468 Proc0 0 5 6 | 7 0 0 | 8 0 4469 9 0 10 | 11 0 0 | 12 0 4470 ------------------------------------- 4471 13 0 14 | 15 16 17 | 0 0 4472 Proc1 0 18 0 | 19 20 21 | 0 0 4473 0 0 0 | 22 23 0 | 24 0 4474 ------------------------------------- 4475 Proc2 25 26 27 | 0 0 28 | 29 0 4476 30 0 0 | 31 32 33 | 0 34 4477 .ve 4478 4479 This can be represented as a collection of submatrices as 4480 4481 .vb 4482 A B C 4483 D E F 4484 G H I 4485 .ve 4486 4487 Where the submatrices A,B,C are owned by proc0, D,E,F are 4488 owned by proc1, G,H,I are owned by proc2. 4489 4490 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4491 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4492 The 'M','N' parameters are 8,8, and have the same values on all procs. 4493 4494 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4495 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4496 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4497 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4498 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4499 matrix, and [DF] as another SeqAIJ matrix. 4500 4501 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4502 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4503 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4504 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4505 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4506 In this case, the values of `d_nz`,`o_nz` are 4507 .vb 4508 proc0 dnz = 2, o_nz = 2 4509 proc1 dnz = 3, o_nz = 2 4510 proc2 dnz = 1, o_nz = 4 4511 .ve 4512 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4513 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4514 for proc3. i.e we are using 12+15+10=37 storage locations to store 4515 34 values. 4516 4517 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4518 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4519 In the above case the values for d_nnz,o_nnz are 4520 .vb 4521 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4522 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4523 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4524 .ve 4525 Here the space allocated is sum of all the above values i.e 34, and 4526 hence pre-allocation is perfect. 4527 4528 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4529 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4530 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4531 @*/ 4532 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4533 { 4534 PetscMPIInt size; 4535 4536 PetscFunctionBegin; 4537 PetscCall(MatCreate(comm, A)); 4538 PetscCall(MatSetSizes(*A, m, n, M, N)); 4539 PetscCallMPI(MPI_Comm_size(comm, &size)); 4540 if (size > 1) { 4541 PetscCall(MatSetType(*A, MATMPIAIJ)); 4542 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4543 } else { 4544 PetscCall(MatSetType(*A, MATSEQAIJ)); 4545 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4546 } 4547 PetscFunctionReturn(PETSC_SUCCESS); 4548 } 4549 4550 /*@C 4551 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4552 4553 Not Collective 4554 4555 Input Parameter: 4556 . A - The `MATMPIAIJ` matrix 4557 4558 Output Parameters: 4559 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4560 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4561 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4562 4563 Level: intermediate 4564 4565 Note: 4566 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4567 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4568 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4569 local column numbers to global column numbers in the original matrix. 4570 4571 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4572 @*/ 4573 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4574 { 4575 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4576 PetscBool flg; 4577 4578 PetscFunctionBegin; 4579 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4580 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4581 if (Ad) *Ad = a->A; 4582 if (Ao) *Ao = a->B; 4583 if (colmap) *colmap = a->garray; 4584 PetscFunctionReturn(PETSC_SUCCESS); 4585 } 4586 4587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4588 { 4589 PetscInt m, N, i, rstart, nnz, Ii; 4590 PetscInt *indx; 4591 PetscScalar *values; 4592 MatType rootType; 4593 4594 PetscFunctionBegin; 4595 PetscCall(MatGetSize(inmat, &m, &N)); 4596 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4597 PetscInt *dnz, *onz, sum, bs, cbs; 4598 4599 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4600 /* Check sum(n) = N */ 4601 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4602 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4603 4604 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4605 rstart -= m; 4606 4607 MatPreallocateBegin(comm, m, n, dnz, onz); 4608 for (i = 0; i < m; i++) { 4609 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4610 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4611 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4612 } 4613 4614 PetscCall(MatCreate(comm, outmat)); 4615 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4616 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4617 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4618 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4619 PetscCall(MatSetType(*outmat, rootType)); 4620 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4621 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4622 MatPreallocateEnd(dnz, onz); 4623 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4624 } 4625 4626 /* numeric phase */ 4627 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4628 for (i = 0; i < m; i++) { 4629 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4630 Ii = i + rstart; 4631 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4632 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4633 } 4634 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4635 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4636 PetscFunctionReturn(PETSC_SUCCESS); 4637 } 4638 4639 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4640 { 4641 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4642 4643 PetscFunctionBegin; 4644 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4645 PetscCall(PetscFree(merge->id_r)); 4646 PetscCall(PetscFree(merge->len_s)); 4647 PetscCall(PetscFree(merge->len_r)); 4648 PetscCall(PetscFree(merge->bi)); 4649 PetscCall(PetscFree(merge->bj)); 4650 PetscCall(PetscFree(merge->buf_ri[0])); 4651 PetscCall(PetscFree(merge->buf_ri)); 4652 PetscCall(PetscFree(merge->buf_rj[0])); 4653 PetscCall(PetscFree(merge->buf_rj)); 4654 PetscCall(PetscFree(merge->coi)); 4655 PetscCall(PetscFree(merge->coj)); 4656 PetscCall(PetscFree(merge->owners_co)); 4657 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4658 PetscCall(PetscFree(merge)); 4659 PetscFunctionReturn(PETSC_SUCCESS); 4660 } 4661 4662 #include <../src/mat/utils/freespace.h> 4663 #include <petscbt.h> 4664 4665 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4666 { 4667 MPI_Comm comm; 4668 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4669 PetscMPIInt size, rank, taga, *len_s; 4670 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4671 PetscMPIInt proc, k; 4672 PetscInt **buf_ri, **buf_rj; 4673 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4674 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4675 MPI_Request *s_waits, *r_waits; 4676 MPI_Status *status; 4677 const MatScalar *aa, *a_a; 4678 MatScalar **abuf_r, *ba_i; 4679 Mat_Merge_SeqsToMPI *merge; 4680 PetscContainer container; 4681 4682 PetscFunctionBegin; 4683 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4684 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4685 4686 PetscCallMPI(MPI_Comm_size(comm, &size)); 4687 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4688 4689 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4690 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4691 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4692 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4693 aa = a_a; 4694 4695 bi = merge->bi; 4696 bj = merge->bj; 4697 buf_ri = merge->buf_ri; 4698 buf_rj = merge->buf_rj; 4699 4700 PetscCall(PetscMalloc1(size, &status)); 4701 owners = merge->rowmap->range; 4702 len_s = merge->len_s; 4703 4704 /* send and recv matrix values */ 4705 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4706 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4707 4708 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4709 for (proc = 0, k = 0; proc < size; proc++) { 4710 if (!len_s[proc]) continue; 4711 i = owners[proc]; 4712 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4713 k++; 4714 } 4715 4716 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4717 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4718 PetscCall(PetscFree(status)); 4719 4720 PetscCall(PetscFree(s_waits)); 4721 PetscCall(PetscFree(r_waits)); 4722 4723 /* insert mat values of mpimat */ 4724 PetscCall(PetscMalloc1(N, &ba_i)); 4725 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4726 4727 for (k = 0; k < merge->nrecv; k++) { 4728 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4729 nrows = *buf_ri_k[k]; 4730 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4731 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4732 } 4733 4734 /* set values of ba */ 4735 m = merge->rowmap->n; 4736 for (i = 0; i < m; i++) { 4737 arow = owners[rank] + i; 4738 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4739 bnzi = bi[i + 1] - bi[i]; 4740 PetscCall(PetscArrayzero(ba_i, bnzi)); 4741 4742 /* add local non-zero vals of this proc's seqmat into ba */ 4743 anzi = ai[arow + 1] - ai[arow]; 4744 aj = a->j + ai[arow]; 4745 aa = a_a + ai[arow]; 4746 nextaj = 0; 4747 for (j = 0; nextaj < anzi; j++) { 4748 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4749 ba_i[j] += aa[nextaj++]; 4750 } 4751 } 4752 4753 /* add received vals into ba */ 4754 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4755 /* i-th row */ 4756 if (i == *nextrow[k]) { 4757 anzi = *(nextai[k] + 1) - *nextai[k]; 4758 aj = buf_rj[k] + *nextai[k]; 4759 aa = abuf_r[k] + *nextai[k]; 4760 nextaj = 0; 4761 for (j = 0; nextaj < anzi; j++) { 4762 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4763 ba_i[j] += aa[nextaj++]; 4764 } 4765 } 4766 nextrow[k]++; 4767 nextai[k]++; 4768 } 4769 } 4770 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4771 } 4772 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4773 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4774 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4775 4776 PetscCall(PetscFree(abuf_r[0])); 4777 PetscCall(PetscFree(abuf_r)); 4778 PetscCall(PetscFree(ba_i)); 4779 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4780 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4781 PetscFunctionReturn(PETSC_SUCCESS); 4782 } 4783 4784 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4785 { 4786 Mat B_mpi; 4787 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4788 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4789 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4790 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4791 PetscInt len, *dnz, *onz, bs, cbs; 4792 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4793 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4794 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4795 MPI_Status *status; 4796 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4797 PetscBT lnkbt; 4798 Mat_Merge_SeqsToMPI *merge; 4799 PetscContainer container; 4800 4801 PetscFunctionBegin; 4802 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4803 4804 /* make sure it is a PETSc comm */ 4805 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4806 PetscCallMPI(MPI_Comm_size(comm, &size)); 4807 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4808 4809 PetscCall(PetscNew(&merge)); 4810 PetscCall(PetscMalloc1(size, &status)); 4811 4812 /* determine row ownership */ 4813 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4814 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4815 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4816 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4817 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4818 PetscCall(PetscMalloc1(size, &len_si)); 4819 PetscCall(PetscMalloc1(size, &merge->len_s)); 4820 4821 m = merge->rowmap->n; 4822 owners = merge->rowmap->range; 4823 4824 /* determine the number of messages to send, their lengths */ 4825 len_s = merge->len_s; 4826 4827 len = 0; /* length of buf_si[] */ 4828 merge->nsend = 0; 4829 for (PetscMPIInt proc = 0; proc < size; proc++) { 4830 len_si[proc] = 0; 4831 if (proc == rank) { 4832 len_s[proc] = 0; 4833 } else { 4834 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4835 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4836 } 4837 if (len_s[proc]) { 4838 merge->nsend++; 4839 nrows = 0; 4840 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4841 if (ai[i + 1] > ai[i]) nrows++; 4842 } 4843 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4844 len += len_si[proc]; 4845 } 4846 } 4847 4848 /* determine the number and length of messages to receive for ij-structure */ 4849 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4850 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4851 4852 /* post the Irecv of j-structure */ 4853 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4854 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4855 4856 /* post the Isend of j-structure */ 4857 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4858 4859 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4860 if (!len_s[proc]) continue; 4861 i = owners[proc]; 4862 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4863 k++; 4864 } 4865 4866 /* receives and sends of j-structure are complete */ 4867 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4868 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4869 4870 /* send and recv i-structure */ 4871 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4872 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4873 4874 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4875 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4876 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4877 if (!len_s[proc]) continue; 4878 /* form outgoing message for i-structure: 4879 buf_si[0]: nrows to be sent 4880 [1:nrows]: row index (global) 4881 [nrows+1:2*nrows+1]: i-structure index 4882 */ 4883 nrows = len_si[proc] / 2 - 1; 4884 buf_si_i = buf_si + nrows + 1; 4885 buf_si[0] = nrows; 4886 buf_si_i[0] = 0; 4887 nrows = 0; 4888 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4889 anzi = ai[i + 1] - ai[i]; 4890 if (anzi) { 4891 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4892 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4893 nrows++; 4894 } 4895 } 4896 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4897 k++; 4898 buf_si += len_si[proc]; 4899 } 4900 4901 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4902 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4903 4904 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4905 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4906 4907 PetscCall(PetscFree(len_si)); 4908 PetscCall(PetscFree(len_ri)); 4909 PetscCall(PetscFree(rj_waits)); 4910 PetscCall(PetscFree2(si_waits, sj_waits)); 4911 PetscCall(PetscFree(ri_waits)); 4912 PetscCall(PetscFree(buf_s)); 4913 PetscCall(PetscFree(status)); 4914 4915 /* compute a local seq matrix in each processor */ 4916 /* allocate bi array and free space for accumulating nonzero column info */ 4917 PetscCall(PetscMalloc1(m + 1, &bi)); 4918 bi[0] = 0; 4919 4920 /* create and initialize a linked list */ 4921 nlnk = N + 1; 4922 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4923 4924 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4925 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4926 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4927 4928 current_space = free_space; 4929 4930 /* determine symbolic info for each local row */ 4931 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4932 4933 for (k = 0; k < merge->nrecv; k++) { 4934 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4935 nrows = *buf_ri_k[k]; 4936 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4937 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4938 } 4939 4940 MatPreallocateBegin(comm, m, n, dnz, onz); 4941 len = 0; 4942 for (i = 0; i < m; i++) { 4943 bnzi = 0; 4944 /* add local non-zero cols of this proc's seqmat into lnk */ 4945 arow = owners[rank] + i; 4946 anzi = ai[arow + 1] - ai[arow]; 4947 aj = a->j + ai[arow]; 4948 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4949 bnzi += nlnk; 4950 /* add received col data into lnk */ 4951 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4952 if (i == *nextrow[k]) { /* i-th row */ 4953 anzi = *(nextai[k] + 1) - *nextai[k]; 4954 aj = buf_rj[k] + *nextai[k]; 4955 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4956 bnzi += nlnk; 4957 nextrow[k]++; 4958 nextai[k]++; 4959 } 4960 } 4961 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4962 4963 /* if free space is not available, make more free space */ 4964 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4965 /* copy data into free space, then initialize lnk */ 4966 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4967 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4968 4969 current_space->array += bnzi; 4970 current_space->local_used += bnzi; 4971 current_space->local_remaining -= bnzi; 4972 4973 bi[i + 1] = bi[i] + bnzi; 4974 } 4975 4976 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4977 4978 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4979 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4980 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4981 4982 /* create symbolic parallel matrix B_mpi */ 4983 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4984 PetscCall(MatCreate(comm, &B_mpi)); 4985 if (n == PETSC_DECIDE) { 4986 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4987 } else { 4988 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4989 } 4990 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4991 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4992 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4993 MatPreallocateEnd(dnz, onz); 4994 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4995 4996 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4997 B_mpi->assembled = PETSC_FALSE; 4998 merge->bi = bi; 4999 merge->bj = bj; 5000 merge->buf_ri = buf_ri; 5001 merge->buf_rj = buf_rj; 5002 merge->coi = NULL; 5003 merge->coj = NULL; 5004 merge->owners_co = NULL; 5005 5006 PetscCall(PetscCommDestroy(&comm)); 5007 5008 /* attach the supporting struct to B_mpi for reuse */ 5009 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5010 PetscCall(PetscContainerSetPointer(container, merge)); 5011 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5012 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5013 PetscCall(PetscContainerDestroy(&container)); 5014 *mpimat = B_mpi; 5015 5016 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5017 PetscFunctionReturn(PETSC_SUCCESS); 5018 } 5019 5020 /*@ 5021 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5022 matrices from each processor 5023 5024 Collective 5025 5026 Input Parameters: 5027 + comm - the communicators the parallel matrix will live on 5028 . seqmat - the input sequential matrices 5029 . m - number of local rows (or `PETSC_DECIDE`) 5030 . n - number of local columns (or `PETSC_DECIDE`) 5031 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5032 5033 Output Parameter: 5034 . mpimat - the parallel matrix generated 5035 5036 Level: advanced 5037 5038 Note: 5039 The dimensions of the sequential matrix in each processor MUST be the same. 5040 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5041 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5042 5043 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5044 @*/ 5045 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5046 { 5047 PetscMPIInt size; 5048 5049 PetscFunctionBegin; 5050 PetscCallMPI(MPI_Comm_size(comm, &size)); 5051 if (size == 1) { 5052 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5053 if (scall == MAT_INITIAL_MATRIX) { 5054 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5055 } else { 5056 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5057 } 5058 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5059 PetscFunctionReturn(PETSC_SUCCESS); 5060 } 5061 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5062 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5063 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5064 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5065 PetscFunctionReturn(PETSC_SUCCESS); 5066 } 5067 5068 /*@ 5069 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5070 5071 Not Collective 5072 5073 Input Parameter: 5074 . A - the matrix 5075 5076 Output Parameter: 5077 . A_loc - the local sequential matrix generated 5078 5079 Level: developer 5080 5081 Notes: 5082 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5083 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5084 `n` is the global column count obtained with `MatGetSize()` 5085 5086 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5087 5088 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5089 5090 Destroy the matrix with `MatDestroy()` 5091 5092 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5093 @*/ 5094 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5095 { 5096 PetscBool mpi; 5097 5098 PetscFunctionBegin; 5099 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5100 if (mpi) { 5101 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5102 } else { 5103 *A_loc = A; 5104 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5105 } 5106 PetscFunctionReturn(PETSC_SUCCESS); 5107 } 5108 5109 /*@ 5110 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5111 5112 Not Collective 5113 5114 Input Parameters: 5115 + A - the matrix 5116 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5117 5118 Output Parameter: 5119 . A_loc - the local sequential matrix generated 5120 5121 Level: developer 5122 5123 Notes: 5124 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5125 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5126 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5127 5128 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5129 5130 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5131 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5132 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5133 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5134 5135 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5136 @*/ 5137 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5138 { 5139 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5140 Mat_SeqAIJ *mat, *a, *b; 5141 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5142 const PetscScalar *aa, *ba, *aav, *bav; 5143 PetscScalar *ca, *cam; 5144 PetscMPIInt size; 5145 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5146 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5147 PetscBool match; 5148 5149 PetscFunctionBegin; 5150 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5151 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5152 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5153 if (size == 1) { 5154 if (scall == MAT_INITIAL_MATRIX) { 5155 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5156 *A_loc = mpimat->A; 5157 } else if (scall == MAT_REUSE_MATRIX) { 5158 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5159 } 5160 PetscFunctionReturn(PETSC_SUCCESS); 5161 } 5162 5163 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5164 a = (Mat_SeqAIJ *)mpimat->A->data; 5165 b = (Mat_SeqAIJ *)mpimat->B->data; 5166 ai = a->i; 5167 aj = a->j; 5168 bi = b->i; 5169 bj = b->j; 5170 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5171 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5172 aa = aav; 5173 ba = bav; 5174 if (scall == MAT_INITIAL_MATRIX) { 5175 PetscCall(PetscMalloc1(1 + am, &ci)); 5176 ci[0] = 0; 5177 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5178 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5179 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5180 k = 0; 5181 for (i = 0; i < am; i++) { 5182 ncols_o = bi[i + 1] - bi[i]; 5183 ncols_d = ai[i + 1] - ai[i]; 5184 /* off-diagonal portion of A */ 5185 for (jo = 0; jo < ncols_o; jo++) { 5186 col = cmap[*bj]; 5187 if (col >= cstart) break; 5188 cj[k] = col; 5189 bj++; 5190 ca[k++] = *ba++; 5191 } 5192 /* diagonal portion of A */ 5193 for (j = 0; j < ncols_d; j++) { 5194 cj[k] = cstart + *aj++; 5195 ca[k++] = *aa++; 5196 } 5197 /* off-diagonal portion of A */ 5198 for (j = jo; j < ncols_o; j++) { 5199 cj[k] = cmap[*bj++]; 5200 ca[k++] = *ba++; 5201 } 5202 } 5203 /* put together the new matrix */ 5204 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5205 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5206 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5207 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5208 mat->free_a = PETSC_TRUE; 5209 mat->free_ij = PETSC_TRUE; 5210 mat->nonew = 0; 5211 } else if (scall == MAT_REUSE_MATRIX) { 5212 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5213 ci = mat->i; 5214 cj = mat->j; 5215 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5216 for (i = 0; i < am; i++) { 5217 /* off-diagonal portion of A */ 5218 ncols_o = bi[i + 1] - bi[i]; 5219 for (jo = 0; jo < ncols_o; jo++) { 5220 col = cmap[*bj]; 5221 if (col >= cstart) break; 5222 *cam++ = *ba++; 5223 bj++; 5224 } 5225 /* diagonal portion of A */ 5226 ncols_d = ai[i + 1] - ai[i]; 5227 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5228 /* off-diagonal portion of A */ 5229 for (j = jo; j < ncols_o; j++) { 5230 *cam++ = *ba++; 5231 bj++; 5232 } 5233 } 5234 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5235 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5236 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5237 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5238 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5239 PetscFunctionReturn(PETSC_SUCCESS); 5240 } 5241 5242 /*@ 5243 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5244 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5245 5246 Not Collective 5247 5248 Input Parameters: 5249 + A - the matrix 5250 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5251 5252 Output Parameters: 5253 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5254 - A_loc - the local sequential matrix generated 5255 5256 Level: developer 5257 5258 Note: 5259 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5260 part, then those associated with the off-diagonal part (in its local ordering) 5261 5262 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5263 @*/ 5264 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5265 { 5266 Mat Ao, Ad; 5267 const PetscInt *cmap; 5268 PetscMPIInt size; 5269 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5270 5271 PetscFunctionBegin; 5272 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5273 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5274 if (size == 1) { 5275 if (scall == MAT_INITIAL_MATRIX) { 5276 PetscCall(PetscObjectReference((PetscObject)Ad)); 5277 *A_loc = Ad; 5278 } else if (scall == MAT_REUSE_MATRIX) { 5279 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5280 } 5281 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5282 PetscFunctionReturn(PETSC_SUCCESS); 5283 } 5284 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5285 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5286 if (f) { 5287 PetscCall((*f)(A, scall, glob, A_loc)); 5288 } else { 5289 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5290 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5291 Mat_SeqAIJ *c; 5292 PetscInt *ai = a->i, *aj = a->j; 5293 PetscInt *bi = b->i, *bj = b->j; 5294 PetscInt *ci, *cj; 5295 const PetscScalar *aa, *ba; 5296 PetscScalar *ca; 5297 PetscInt i, j, am, dn, on; 5298 5299 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5300 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5301 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5302 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5303 if (scall == MAT_INITIAL_MATRIX) { 5304 PetscInt k; 5305 PetscCall(PetscMalloc1(1 + am, &ci)); 5306 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5307 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5308 ci[0] = 0; 5309 for (i = 0, k = 0; i < am; i++) { 5310 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5311 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5312 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5313 /* diagonal portion of A */ 5314 for (j = 0; j < ncols_d; j++, k++) { 5315 cj[k] = *aj++; 5316 ca[k] = *aa++; 5317 } 5318 /* off-diagonal portion of A */ 5319 for (j = 0; j < ncols_o; j++, k++) { 5320 cj[k] = dn + *bj++; 5321 ca[k] = *ba++; 5322 } 5323 } 5324 /* put together the new matrix */ 5325 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5326 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5327 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5328 c = (Mat_SeqAIJ *)(*A_loc)->data; 5329 c->free_a = PETSC_TRUE; 5330 c->free_ij = PETSC_TRUE; 5331 c->nonew = 0; 5332 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5333 } else if (scall == MAT_REUSE_MATRIX) { 5334 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5335 for (i = 0; i < am; i++) { 5336 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5337 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5338 /* diagonal portion of A */ 5339 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5340 /* off-diagonal portion of A */ 5341 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5342 } 5343 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5344 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5345 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5346 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5347 if (glob) { 5348 PetscInt cst, *gidx; 5349 5350 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5351 PetscCall(PetscMalloc1(dn + on, &gidx)); 5352 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5353 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5354 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5355 } 5356 } 5357 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5358 PetscFunctionReturn(PETSC_SUCCESS); 5359 } 5360 5361 /*@C 5362 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5363 5364 Not Collective 5365 5366 Input Parameters: 5367 + A - the matrix 5368 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5369 . row - index set of rows to extract (or `NULL`) 5370 - col - index set of columns to extract (or `NULL`) 5371 5372 Output Parameter: 5373 . A_loc - the local sequential matrix generated 5374 5375 Level: developer 5376 5377 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5378 @*/ 5379 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5380 { 5381 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5382 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5383 IS isrowa, iscola; 5384 Mat *aloc; 5385 PetscBool match; 5386 5387 PetscFunctionBegin; 5388 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5389 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5390 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5391 if (!row) { 5392 start = A->rmap->rstart; 5393 end = A->rmap->rend; 5394 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5395 } else { 5396 isrowa = *row; 5397 } 5398 if (!col) { 5399 start = A->cmap->rstart; 5400 cmap = a->garray; 5401 nzA = a->A->cmap->n; 5402 nzB = a->B->cmap->n; 5403 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5404 ncols = 0; 5405 for (i = 0; i < nzB; i++) { 5406 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5407 else break; 5408 } 5409 imark = i; 5410 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5411 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5412 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5413 } else { 5414 iscola = *col; 5415 } 5416 if (scall != MAT_INITIAL_MATRIX) { 5417 PetscCall(PetscMalloc1(1, &aloc)); 5418 aloc[0] = *A_loc; 5419 } 5420 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5421 if (!col) { /* attach global id of condensed columns */ 5422 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5423 } 5424 *A_loc = aloc[0]; 5425 PetscCall(PetscFree(aloc)); 5426 if (!row) PetscCall(ISDestroy(&isrowa)); 5427 if (!col) PetscCall(ISDestroy(&iscola)); 5428 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5429 PetscFunctionReturn(PETSC_SUCCESS); 5430 } 5431 5432 /* 5433 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5434 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5435 * on a global size. 5436 * */ 5437 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5438 { 5439 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5440 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5441 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5442 PetscMPIInt owner; 5443 PetscSFNode *iremote, *oiremote; 5444 const PetscInt *lrowindices; 5445 PetscSF sf, osf; 5446 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5447 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5448 MPI_Comm comm; 5449 ISLocalToGlobalMapping mapping; 5450 const PetscScalar *pd_a, *po_a; 5451 5452 PetscFunctionBegin; 5453 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5454 /* plocalsize is the number of roots 5455 * nrows is the number of leaves 5456 * */ 5457 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5458 PetscCall(ISGetLocalSize(rows, &nrows)); 5459 PetscCall(PetscCalloc1(nrows, &iremote)); 5460 PetscCall(ISGetIndices(rows, &lrowindices)); 5461 for (i = 0; i < nrows; i++) { 5462 /* Find a remote index and an owner for a row 5463 * The row could be local or remote 5464 * */ 5465 owner = 0; 5466 lidx = 0; 5467 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5468 iremote[i].index = lidx; 5469 iremote[i].rank = owner; 5470 } 5471 /* Create SF to communicate how many nonzero columns for each row */ 5472 PetscCall(PetscSFCreate(comm, &sf)); 5473 /* SF will figure out the number of nonzero columns for each row, and their 5474 * offsets 5475 * */ 5476 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5477 PetscCall(PetscSFSetFromOptions(sf)); 5478 PetscCall(PetscSFSetUp(sf)); 5479 5480 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5481 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5482 PetscCall(PetscCalloc1(nrows, &pnnz)); 5483 roffsets[0] = 0; 5484 roffsets[1] = 0; 5485 for (i = 0; i < plocalsize; i++) { 5486 /* diagonal */ 5487 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5488 /* off-diagonal */ 5489 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5490 /* compute offsets so that we relative location for each row */ 5491 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5492 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5493 } 5494 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5495 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5496 /* 'r' means root, and 'l' means leaf */ 5497 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5498 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5499 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5500 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5501 PetscCall(PetscSFDestroy(&sf)); 5502 PetscCall(PetscFree(roffsets)); 5503 PetscCall(PetscFree(nrcols)); 5504 dntotalcols = 0; 5505 ontotalcols = 0; 5506 ncol = 0; 5507 for (i = 0; i < nrows; i++) { 5508 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5509 ncol = PetscMax(pnnz[i], ncol); 5510 /* diagonal */ 5511 dntotalcols += nlcols[i * 2 + 0]; 5512 /* off-diagonal */ 5513 ontotalcols += nlcols[i * 2 + 1]; 5514 } 5515 /* We do not need to figure the right number of columns 5516 * since all the calculations will be done by going through the raw data 5517 * */ 5518 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5519 PetscCall(MatSetUp(*P_oth)); 5520 PetscCall(PetscFree(pnnz)); 5521 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5522 /* diagonal */ 5523 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5524 /* off-diagonal */ 5525 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5526 /* diagonal */ 5527 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5528 /* off-diagonal */ 5529 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5530 dntotalcols = 0; 5531 ontotalcols = 0; 5532 ntotalcols = 0; 5533 for (i = 0; i < nrows; i++) { 5534 owner = 0; 5535 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5536 /* Set iremote for diag matrix */ 5537 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5538 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5539 iremote[dntotalcols].rank = owner; 5540 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5541 ilocal[dntotalcols++] = ntotalcols++; 5542 } 5543 /* off-diagonal */ 5544 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5545 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5546 oiremote[ontotalcols].rank = owner; 5547 oilocal[ontotalcols++] = ntotalcols++; 5548 } 5549 } 5550 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5551 PetscCall(PetscFree(loffsets)); 5552 PetscCall(PetscFree(nlcols)); 5553 PetscCall(PetscSFCreate(comm, &sf)); 5554 /* P serves as roots and P_oth is leaves 5555 * Diag matrix 5556 * */ 5557 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5558 PetscCall(PetscSFSetFromOptions(sf)); 5559 PetscCall(PetscSFSetUp(sf)); 5560 5561 PetscCall(PetscSFCreate(comm, &osf)); 5562 /* off-diagonal */ 5563 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5564 PetscCall(PetscSFSetFromOptions(osf)); 5565 PetscCall(PetscSFSetUp(osf)); 5566 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5567 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5568 /* operate on the matrix internal data to save memory */ 5569 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5570 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5571 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5572 /* Convert to global indices for diag matrix */ 5573 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5574 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5575 /* We want P_oth store global indices */ 5576 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5577 /* Use memory scalable approach */ 5578 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5579 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5580 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5581 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5582 /* Convert back to local indices */ 5583 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5584 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5585 nout = 0; 5586 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5587 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5588 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5589 /* Exchange values */ 5590 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5591 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5592 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5593 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5594 /* Stop PETSc from shrinking memory */ 5595 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5596 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5597 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5598 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5599 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5600 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5601 PetscCall(PetscSFDestroy(&sf)); 5602 PetscCall(PetscSFDestroy(&osf)); 5603 PetscFunctionReturn(PETSC_SUCCESS); 5604 } 5605 5606 /* 5607 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5608 * This supports MPIAIJ and MAIJ 5609 * */ 5610 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5611 { 5612 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5613 Mat_SeqAIJ *p_oth; 5614 IS rows, map; 5615 PetscHMapI hamp; 5616 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5617 MPI_Comm comm; 5618 PetscSF sf, osf; 5619 PetscBool has; 5620 5621 PetscFunctionBegin; 5622 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5623 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5624 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5625 * and then create a submatrix (that often is an overlapping matrix) 5626 * */ 5627 if (reuse == MAT_INITIAL_MATRIX) { 5628 /* Use a hash table to figure out unique keys */ 5629 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5630 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5631 count = 0; 5632 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5633 for (i = 0; i < a->B->cmap->n; i++) { 5634 key = a->garray[i] / dof; 5635 PetscCall(PetscHMapIHas(hamp, key, &has)); 5636 if (!has) { 5637 mapping[i] = count; 5638 PetscCall(PetscHMapISet(hamp, key, count++)); 5639 } else { 5640 /* Current 'i' has the same value the previous step */ 5641 mapping[i] = count - 1; 5642 } 5643 } 5644 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5645 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5646 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5647 PetscCall(PetscCalloc1(htsize, &rowindices)); 5648 off = 0; 5649 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5650 PetscCall(PetscHMapIDestroy(&hamp)); 5651 PetscCall(PetscSortInt(htsize, rowindices)); 5652 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5653 /* In case, the matrix was already created but users want to recreate the matrix */ 5654 PetscCall(MatDestroy(P_oth)); 5655 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5656 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5657 PetscCall(ISDestroy(&map)); 5658 PetscCall(ISDestroy(&rows)); 5659 } else if (reuse == MAT_REUSE_MATRIX) { 5660 /* If matrix was already created, we simply update values using SF objects 5661 * that as attached to the matrix earlier. 5662 */ 5663 const PetscScalar *pd_a, *po_a; 5664 5665 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5666 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5667 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5668 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5669 /* Update values in place */ 5670 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5671 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5672 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5673 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5674 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5675 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5676 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5677 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5678 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5679 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5680 PetscFunctionReturn(PETSC_SUCCESS); 5681 } 5682 5683 /*@C 5684 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5685 5686 Collective 5687 5688 Input Parameters: 5689 + A - the first matrix in `MATMPIAIJ` format 5690 . B - the second matrix in `MATMPIAIJ` format 5691 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5692 5693 Output Parameters: 5694 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5695 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5696 - B_seq - the sequential matrix generated 5697 5698 Level: developer 5699 5700 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5701 @*/ 5702 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5703 { 5704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5705 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5706 IS isrowb, iscolb; 5707 Mat *bseq = NULL; 5708 5709 PetscFunctionBegin; 5710 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5711 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5712 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5713 5714 if (scall == MAT_INITIAL_MATRIX) { 5715 start = A->cmap->rstart; 5716 cmap = a->garray; 5717 nzA = a->A->cmap->n; 5718 nzB = a->B->cmap->n; 5719 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5720 ncols = 0; 5721 for (i = 0; i < nzB; i++) { /* row < local row index */ 5722 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5723 else break; 5724 } 5725 imark = i; 5726 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5727 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5728 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5729 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5730 } else { 5731 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5732 isrowb = *rowb; 5733 iscolb = *colb; 5734 PetscCall(PetscMalloc1(1, &bseq)); 5735 bseq[0] = *B_seq; 5736 } 5737 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5738 *B_seq = bseq[0]; 5739 PetscCall(PetscFree(bseq)); 5740 if (!rowb) { 5741 PetscCall(ISDestroy(&isrowb)); 5742 } else { 5743 *rowb = isrowb; 5744 } 5745 if (!colb) { 5746 PetscCall(ISDestroy(&iscolb)); 5747 } else { 5748 *colb = iscolb; 5749 } 5750 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5751 PetscFunctionReturn(PETSC_SUCCESS); 5752 } 5753 5754 /* 5755 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5756 of the OFF-DIAGONAL portion of local A 5757 5758 Collective 5759 5760 Input Parameters: 5761 + A,B - the matrices in `MATMPIAIJ` format 5762 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5763 5764 Output Parameter: 5765 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5766 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5767 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5768 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5769 5770 Developer Note: 5771 This directly accesses information inside the VecScatter associated with the matrix-vector product 5772 for this matrix. This is not desirable.. 5773 5774 Level: developer 5775 5776 */ 5777 5778 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5779 { 5780 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5781 VecScatter ctx; 5782 MPI_Comm comm; 5783 const PetscMPIInt *rprocs, *sprocs; 5784 PetscMPIInt nrecvs, nsends; 5785 const PetscInt *srow, *rstarts, *sstarts; 5786 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5787 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5788 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5789 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5790 PetscMPIInt size, tag, rank, nreqs; 5791 5792 PetscFunctionBegin; 5793 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5794 PetscCallMPI(MPI_Comm_size(comm, &size)); 5795 5796 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5797 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5798 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5799 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5800 5801 if (size == 1) { 5802 startsj_s = NULL; 5803 bufa_ptr = NULL; 5804 *B_oth = NULL; 5805 PetscFunctionReturn(PETSC_SUCCESS); 5806 } 5807 5808 ctx = a->Mvctx; 5809 tag = ((PetscObject)ctx)->tag; 5810 5811 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5812 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5813 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5814 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5815 PetscCall(PetscMalloc1(nreqs, &reqs)); 5816 rwaits = reqs; 5817 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5818 5819 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5820 if (scall == MAT_INITIAL_MATRIX) { 5821 /* i-array */ 5822 /* post receives */ 5823 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5824 for (i = 0; i < nrecvs; i++) { 5825 rowlen = rvalues + rstarts[i] * rbs; 5826 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5827 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5828 } 5829 5830 /* pack the outgoing message */ 5831 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5832 5833 sstartsj[0] = 0; 5834 rstartsj[0] = 0; 5835 len = 0; /* total length of j or a array to be sent */ 5836 if (nsends) { 5837 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5838 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5839 } 5840 for (i = 0; i < nsends; i++) { 5841 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5842 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5843 for (j = 0; j < nrows; j++) { 5844 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5845 for (l = 0; l < sbs; l++) { 5846 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5847 5848 rowlen[j * sbs + l] = ncols; 5849 5850 len += ncols; 5851 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5852 } 5853 k++; 5854 } 5855 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5856 5857 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5858 } 5859 /* recvs and sends of i-array are completed */ 5860 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5861 PetscCall(PetscFree(svalues)); 5862 5863 /* allocate buffers for sending j and a arrays */ 5864 PetscCall(PetscMalloc1(len + 1, &bufj)); 5865 PetscCall(PetscMalloc1(len + 1, &bufa)); 5866 5867 /* create i-array of B_oth */ 5868 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5869 5870 b_othi[0] = 0; 5871 len = 0; /* total length of j or a array to be received */ 5872 k = 0; 5873 for (i = 0; i < nrecvs; i++) { 5874 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5875 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5876 for (j = 0; j < nrows; j++) { 5877 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5878 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5879 k++; 5880 } 5881 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5882 } 5883 PetscCall(PetscFree(rvalues)); 5884 5885 /* allocate space for j and a arrays of B_oth */ 5886 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5887 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5888 5889 /* j-array */ 5890 /* post receives of j-array */ 5891 for (i = 0; i < nrecvs; i++) { 5892 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5893 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5894 } 5895 5896 /* pack the outgoing message j-array */ 5897 if (nsends) k = sstarts[0]; 5898 for (i = 0; i < nsends; i++) { 5899 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5900 bufJ = bufj + sstartsj[i]; 5901 for (j = 0; j < nrows; j++) { 5902 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5903 for (ll = 0; ll < sbs; ll++) { 5904 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5905 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5906 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5907 } 5908 } 5909 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5910 } 5911 5912 /* recvs and sends of j-array are completed */ 5913 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5914 } else if (scall == MAT_REUSE_MATRIX) { 5915 sstartsj = *startsj_s; 5916 rstartsj = *startsj_r; 5917 bufa = *bufa_ptr; 5918 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5919 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5920 5921 /* a-array */ 5922 /* post receives of a-array */ 5923 for (i = 0; i < nrecvs; i++) { 5924 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5925 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5926 } 5927 5928 /* pack the outgoing message a-array */ 5929 if (nsends) k = sstarts[0]; 5930 for (i = 0; i < nsends; i++) { 5931 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5932 bufA = bufa + sstartsj[i]; 5933 for (j = 0; j < nrows; j++) { 5934 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5935 for (ll = 0; ll < sbs; ll++) { 5936 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5937 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5938 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5939 } 5940 } 5941 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5942 } 5943 /* recvs and sends of a-array are completed */ 5944 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5945 PetscCall(PetscFree(reqs)); 5946 5947 if (scall == MAT_INITIAL_MATRIX) { 5948 Mat_SeqAIJ *b_oth; 5949 5950 /* put together the new matrix */ 5951 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5952 5953 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5954 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5955 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5956 b_oth->free_a = PETSC_TRUE; 5957 b_oth->free_ij = PETSC_TRUE; 5958 b_oth->nonew = 0; 5959 5960 PetscCall(PetscFree(bufj)); 5961 if (!startsj_s || !bufa_ptr) { 5962 PetscCall(PetscFree2(sstartsj, rstartsj)); 5963 PetscCall(PetscFree(bufa_ptr)); 5964 } else { 5965 *startsj_s = sstartsj; 5966 *startsj_r = rstartsj; 5967 *bufa_ptr = bufa; 5968 } 5969 } else if (scall == MAT_REUSE_MATRIX) { 5970 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5971 } 5972 5973 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5974 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5975 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5976 PetscFunctionReturn(PETSC_SUCCESS); 5977 } 5978 5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5982 #if defined(PETSC_HAVE_MKL_SPARSE) 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5984 #endif 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5987 #if defined(PETSC_HAVE_ELEMENTAL) 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5989 #endif 5990 #if defined(PETSC_HAVE_SCALAPACK) 5991 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5992 #endif 5993 #if defined(PETSC_HAVE_HYPRE) 5994 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5995 #endif 5996 #if defined(PETSC_HAVE_CUDA) 5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5998 #endif 5999 #if defined(PETSC_HAVE_HIP) 6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6001 #endif 6002 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6003 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6004 #endif 6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6006 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6007 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6008 6009 /* 6010 Computes (B'*A')' since computing B*A directly is untenable 6011 6012 n p p 6013 [ ] [ ] [ ] 6014 m [ A ] * n [ B ] = m [ C ] 6015 [ ] [ ] [ ] 6016 6017 */ 6018 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6019 { 6020 Mat At, Bt, Ct; 6021 6022 PetscFunctionBegin; 6023 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6024 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6025 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6026 PetscCall(MatDestroy(&At)); 6027 PetscCall(MatDestroy(&Bt)); 6028 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6029 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6030 PetscCall(MatDestroy(&Ct)); 6031 PetscFunctionReturn(PETSC_SUCCESS); 6032 } 6033 6034 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6035 { 6036 PetscBool cisdense; 6037 6038 PetscFunctionBegin; 6039 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6040 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6041 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6042 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6043 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6044 PetscCall(MatSetUp(C)); 6045 6046 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6047 PetscFunctionReturn(PETSC_SUCCESS); 6048 } 6049 6050 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6051 { 6052 Mat_Product *product = C->product; 6053 Mat A = product->A, B = product->B; 6054 6055 PetscFunctionBegin; 6056 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6057 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6058 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6059 C->ops->productsymbolic = MatProductSymbolic_AB; 6060 PetscFunctionReturn(PETSC_SUCCESS); 6061 } 6062 6063 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6064 { 6065 Mat_Product *product = C->product; 6066 6067 PetscFunctionBegin; 6068 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6069 PetscFunctionReturn(PETSC_SUCCESS); 6070 } 6071 6072 /* 6073 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6074 6075 Input Parameters: 6076 6077 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6078 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6079 6080 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6081 6082 For Set1, j1[] contains column indices of the nonzeros. 6083 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6084 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6085 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6086 6087 Similar for Set2. 6088 6089 This routine merges the two sets of nonzeros row by row and removes repeats. 6090 6091 Output Parameters: (memory is allocated by the caller) 6092 6093 i[],j[]: the CSR of the merged matrix, which has m rows. 6094 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6095 imap2[]: similar to imap1[], but for Set2. 6096 Note we order nonzeros row-by-row and from left to right. 6097 */ 6098 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6099 { 6100 PetscInt r, m; /* Row index of mat */ 6101 PetscCount t, t1, t2, b1, e1, b2, e2; 6102 6103 PetscFunctionBegin; 6104 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6105 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6106 i[0] = 0; 6107 for (r = 0; r < m; r++) { /* Do row by row merging */ 6108 b1 = rowBegin1[r]; 6109 e1 = rowEnd1[r]; 6110 b2 = rowBegin2[r]; 6111 e2 = rowEnd2[r]; 6112 while (b1 < e1 && b2 < e2) { 6113 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6114 j[t] = j1[b1]; 6115 imap1[t1] = t; 6116 imap2[t2] = t; 6117 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6118 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6119 t1++; 6120 t2++; 6121 t++; 6122 } else if (j1[b1] < j2[b2]) { 6123 j[t] = j1[b1]; 6124 imap1[t1] = t; 6125 b1 += jmap1[t1 + 1] - jmap1[t1]; 6126 t1++; 6127 t++; 6128 } else { 6129 j[t] = j2[b2]; 6130 imap2[t2] = t; 6131 b2 += jmap2[t2 + 1] - jmap2[t2]; 6132 t2++; 6133 t++; 6134 } 6135 } 6136 /* Merge the remaining in either j1[] or j2[] */ 6137 while (b1 < e1) { 6138 j[t] = j1[b1]; 6139 imap1[t1] = t; 6140 b1 += jmap1[t1 + 1] - jmap1[t1]; 6141 t1++; 6142 t++; 6143 } 6144 while (b2 < e2) { 6145 j[t] = j2[b2]; 6146 imap2[t2] = t; 6147 b2 += jmap2[t2 + 1] - jmap2[t2]; 6148 t2++; 6149 t++; 6150 } 6151 PetscCall(PetscIntCast(t, i + r + 1)); 6152 } 6153 PetscFunctionReturn(PETSC_SUCCESS); 6154 } 6155 6156 /* 6157 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6158 6159 Input Parameters: 6160 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6161 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6162 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6163 6164 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6165 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6166 6167 Output Parameters: 6168 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6169 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6170 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6171 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6172 6173 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6174 Atot: number of entries belonging to the diagonal block. 6175 Annz: number of unique nonzeros belonging to the diagonal block. 6176 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6177 repeats (i.e., same 'i,j' pair). 6178 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6179 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6180 6181 Atot: number of entries belonging to the diagonal block 6182 Annz: number of unique nonzeros belonging to the diagonal block. 6183 6184 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6185 6186 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6187 */ 6188 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6189 { 6190 PetscInt cstart, cend, rstart, rend, row, col; 6191 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6192 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6193 PetscCount k, m, p, q, r, s, mid; 6194 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6195 6196 PetscFunctionBegin; 6197 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6198 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6199 m = rend - rstart; 6200 6201 /* Skip negative rows */ 6202 for (k = 0; k < n; k++) 6203 if (i[k] >= 0) break; 6204 6205 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6206 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6207 */ 6208 while (k < n) { 6209 row = i[k]; 6210 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6211 for (s = k; s < n; s++) 6212 if (i[s] != row) break; 6213 6214 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6215 for (p = k; p < s; p++) { 6216 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6217 } 6218 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6219 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6220 rowBegin[row - rstart] = k; 6221 rowMid[row - rstart] = mid; 6222 rowEnd[row - rstart] = s; 6223 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6224 6225 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6226 Atot += mid - k; 6227 Btot += s - mid; 6228 6229 /* Count unique nonzeros of this diag row */ 6230 for (p = k; p < mid;) { 6231 col = j[p]; 6232 do { 6233 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6234 p++; 6235 } while (p < mid && j[p] == col); 6236 Annz++; 6237 } 6238 6239 /* Count unique nonzeros of this offdiag row */ 6240 for (p = mid; p < s;) { 6241 col = j[p]; 6242 do { 6243 p++; 6244 } while (p < s && j[p] == col); 6245 Bnnz++; 6246 } 6247 k = s; 6248 } 6249 6250 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6251 PetscCall(PetscMalloc1(Atot, &Aperm)); 6252 PetscCall(PetscMalloc1(Btot, &Bperm)); 6253 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6254 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6255 6256 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6257 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6258 for (r = 0; r < m; r++) { 6259 k = rowBegin[r]; 6260 mid = rowMid[r]; 6261 s = rowEnd[r]; 6262 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6263 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6264 Atot += mid - k; 6265 Btot += s - mid; 6266 6267 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6268 for (p = k; p < mid;) { 6269 col = j[p]; 6270 q = p; 6271 do { 6272 p++; 6273 } while (p < mid && j[p] == col); 6274 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6275 Annz++; 6276 } 6277 6278 for (p = mid; p < s;) { 6279 col = j[p]; 6280 q = p; 6281 do { 6282 p++; 6283 } while (p < s && j[p] == col); 6284 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6285 Bnnz++; 6286 } 6287 } 6288 /* Output */ 6289 *Aperm_ = Aperm; 6290 *Annz_ = Annz; 6291 *Atot_ = Atot; 6292 *Ajmap_ = Ajmap; 6293 *Bperm_ = Bperm; 6294 *Bnnz_ = Bnnz; 6295 *Btot_ = Btot; 6296 *Bjmap_ = Bjmap; 6297 PetscFunctionReturn(PETSC_SUCCESS); 6298 } 6299 6300 /* 6301 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6302 6303 Input Parameters: 6304 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6305 nnz: number of unique nonzeros in the merged matrix 6306 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6307 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6308 6309 Output Parameter: (memory is allocated by the caller) 6310 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6311 6312 Example: 6313 nnz1 = 4 6314 nnz = 6 6315 imap = [1,3,4,5] 6316 jmap = [0,3,5,6,7] 6317 then, 6318 jmap_new = [0,0,3,3,5,6,7] 6319 */ 6320 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6321 { 6322 PetscCount k, p; 6323 6324 PetscFunctionBegin; 6325 jmap_new[0] = 0; 6326 p = nnz; /* p loops over jmap_new[] backwards */ 6327 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6328 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6329 } 6330 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6331 PetscFunctionReturn(PETSC_SUCCESS); 6332 } 6333 6334 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6335 { 6336 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6337 6338 PetscFunctionBegin; 6339 PetscCall(PetscSFDestroy(&coo->sf)); 6340 PetscCall(PetscFree(coo->Aperm1)); 6341 PetscCall(PetscFree(coo->Bperm1)); 6342 PetscCall(PetscFree(coo->Ajmap1)); 6343 PetscCall(PetscFree(coo->Bjmap1)); 6344 PetscCall(PetscFree(coo->Aimap2)); 6345 PetscCall(PetscFree(coo->Bimap2)); 6346 PetscCall(PetscFree(coo->Aperm2)); 6347 PetscCall(PetscFree(coo->Bperm2)); 6348 PetscCall(PetscFree(coo->Ajmap2)); 6349 PetscCall(PetscFree(coo->Bjmap2)); 6350 PetscCall(PetscFree(coo->Cperm1)); 6351 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6352 PetscCall(PetscFree(coo)); 6353 PetscFunctionReturn(PETSC_SUCCESS); 6354 } 6355 6356 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6357 { 6358 MPI_Comm comm; 6359 PetscMPIInt rank, size; 6360 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6361 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6362 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6363 PetscContainer container; 6364 MatCOOStruct_MPIAIJ *coo; 6365 6366 PetscFunctionBegin; 6367 PetscCall(PetscFree(mpiaij->garray)); 6368 PetscCall(VecDestroy(&mpiaij->lvec)); 6369 #if defined(PETSC_USE_CTABLE) 6370 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6371 #else 6372 PetscCall(PetscFree(mpiaij->colmap)); 6373 #endif 6374 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6375 mat->assembled = PETSC_FALSE; 6376 mat->was_assembled = PETSC_FALSE; 6377 6378 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6379 PetscCallMPI(MPI_Comm_size(comm, &size)); 6380 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6381 PetscCall(PetscLayoutSetUp(mat->rmap)); 6382 PetscCall(PetscLayoutSetUp(mat->cmap)); 6383 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6384 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6385 PetscCall(MatGetLocalSize(mat, &m, &n)); 6386 PetscCall(MatGetSize(mat, &M, &N)); 6387 6388 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6389 /* entries come first, then local rows, then remote rows. */ 6390 PetscCount n1 = coo_n, *perm1; 6391 PetscInt *i1 = coo_i, *j1 = coo_j; 6392 6393 PetscCall(PetscMalloc1(n1, &perm1)); 6394 for (k = 0; k < n1; k++) perm1[k] = k; 6395 6396 /* Manipulate indices so that entries with negative row or col indices will have smallest 6397 row indices, local entries will have greater but negative row indices, and remote entries 6398 will have positive row indices. 6399 */ 6400 for (k = 0; k < n1; k++) { 6401 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6402 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6403 else { 6404 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6405 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6406 } 6407 } 6408 6409 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6410 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6411 6412 /* Advance k to the first entry we need to take care of */ 6413 for (k = 0; k < n1; k++) 6414 if (i1[k] > PETSC_INT_MIN) break; 6415 PetscCount i1start = k; 6416 6417 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6418 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6419 6420 PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6421 6422 /* Send remote rows to their owner */ 6423 /* Find which rows should be sent to which remote ranks*/ 6424 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6425 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6426 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6427 const PetscInt *ranges; 6428 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6429 6430 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6431 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6432 for (k = rem; k < n1;) { 6433 PetscMPIInt owner; 6434 PetscInt firstRow, lastRow; 6435 6436 /* Locate a row range */ 6437 firstRow = i1[k]; /* first row of this owner */ 6438 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6439 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6440 6441 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6442 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6443 6444 /* All entries in [k,p) belong to this remote owner */ 6445 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6446 PetscMPIInt *sendto2; 6447 PetscInt *nentries2; 6448 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6449 6450 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6451 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6452 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6453 PetscCall(PetscFree2(sendto, nentries2)); 6454 sendto = sendto2; 6455 nentries = nentries2; 6456 maxNsend = maxNsend2; 6457 } 6458 sendto[nsend] = owner; 6459 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6460 nsend++; 6461 k = p; 6462 } 6463 6464 /* Build 1st SF to know offsets on remote to send data */ 6465 PetscSF sf1; 6466 PetscInt nroots = 1, nroots2 = 0; 6467 PetscInt nleaves = nsend, nleaves2 = 0; 6468 PetscInt *offsets; 6469 PetscSFNode *iremote; 6470 6471 PetscCall(PetscSFCreate(comm, &sf1)); 6472 PetscCall(PetscMalloc1(nsend, &iremote)); 6473 PetscCall(PetscMalloc1(nsend, &offsets)); 6474 for (k = 0; k < nsend; k++) { 6475 iremote[k].rank = sendto[k]; 6476 iremote[k].index = 0; 6477 nleaves2 += nentries[k]; 6478 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6479 } 6480 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6481 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6482 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6483 PetscCall(PetscSFDestroy(&sf1)); 6484 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6485 6486 /* Build 2nd SF to send remote COOs to their owner */ 6487 PetscSF sf2; 6488 nroots = nroots2; 6489 nleaves = nleaves2; 6490 PetscCall(PetscSFCreate(comm, &sf2)); 6491 PetscCall(PetscSFSetFromOptions(sf2)); 6492 PetscCall(PetscMalloc1(nleaves, &iremote)); 6493 p = 0; 6494 for (k = 0; k < nsend; k++) { 6495 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6496 for (q = 0; q < nentries[k]; q++, p++) { 6497 iremote[p].rank = sendto[k]; 6498 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6499 } 6500 } 6501 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6502 6503 /* Send the remote COOs to their owner */ 6504 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6505 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6506 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6507 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6508 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6509 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6510 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6511 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6512 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6513 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6514 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6515 6516 PetscCall(PetscFree(offsets)); 6517 PetscCall(PetscFree2(sendto, nentries)); 6518 6519 /* Sort received COOs by row along with the permutation array */ 6520 for (k = 0; k < n2; k++) perm2[k] = k; 6521 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6522 6523 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6524 PetscCount *Cperm1; 6525 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6526 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6527 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6528 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6529 6530 /* Support for HYPRE matrices, kind of a hack. 6531 Swap min column with diagonal so that diagonal values will go first */ 6532 PetscBool hypre; 6533 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6534 if (hypre) { 6535 PetscInt *minj; 6536 PetscBT hasdiag; 6537 6538 PetscCall(PetscBTCreate(m, &hasdiag)); 6539 PetscCall(PetscMalloc1(m, &minj)); 6540 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6541 for (k = i1start; k < rem; k++) { 6542 if (j1[k] < cstart || j1[k] >= cend) continue; 6543 const PetscInt rindex = i1[k] - rstart; 6544 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6545 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6546 } 6547 for (k = 0; k < n2; k++) { 6548 if (j2[k] < cstart || j2[k] >= cend) continue; 6549 const PetscInt rindex = i2[k] - rstart; 6550 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6551 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6552 } 6553 for (k = i1start; k < rem; k++) { 6554 const PetscInt rindex = i1[k] - rstart; 6555 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6556 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6557 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6558 } 6559 for (k = 0; k < n2; k++) { 6560 const PetscInt rindex = i2[k] - rstart; 6561 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6562 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6563 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6564 } 6565 PetscCall(PetscBTDestroy(&hasdiag)); 6566 PetscCall(PetscFree(minj)); 6567 } 6568 6569 /* Split local COOs and received COOs into diag/offdiag portions */ 6570 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6571 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6572 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6573 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6574 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6575 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6576 6577 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6578 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6579 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6580 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6581 6582 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6583 PetscInt *Ai, *Bi; 6584 PetscInt *Aj, *Bj; 6585 6586 PetscCall(PetscMalloc1(m + 1, &Ai)); 6587 PetscCall(PetscMalloc1(m + 1, &Bi)); 6588 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6589 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6590 6591 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6592 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6593 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6594 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6595 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6596 6597 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6598 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6599 6600 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6601 /* expect nonzeros in A/B most likely have local contributing entries */ 6602 PetscInt Annz = Ai[m]; 6603 PetscInt Bnnz = Bi[m]; 6604 PetscCount *Ajmap1_new, *Bjmap1_new; 6605 6606 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6607 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6608 6609 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6610 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6611 6612 PetscCall(PetscFree(Aimap1)); 6613 PetscCall(PetscFree(Ajmap1)); 6614 PetscCall(PetscFree(Bimap1)); 6615 PetscCall(PetscFree(Bjmap1)); 6616 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6617 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6618 PetscCall(PetscFree(perm1)); 6619 PetscCall(PetscFree3(i2, j2, perm2)); 6620 6621 Ajmap1 = Ajmap1_new; 6622 Bjmap1 = Bjmap1_new; 6623 6624 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6625 if (Annz < Annz1 + Annz2) { 6626 PetscInt *Aj_new; 6627 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6628 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6629 PetscCall(PetscFree(Aj)); 6630 Aj = Aj_new; 6631 } 6632 6633 if (Bnnz < Bnnz1 + Bnnz2) { 6634 PetscInt *Bj_new; 6635 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6636 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6637 PetscCall(PetscFree(Bj)); 6638 Bj = Bj_new; 6639 } 6640 6641 /* Create new submatrices for on-process and off-process coupling */ 6642 PetscScalar *Aa, *Ba; 6643 MatType rtype; 6644 Mat_SeqAIJ *a, *b; 6645 PetscObjectState state; 6646 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6647 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6648 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6649 if (cstart) { 6650 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6651 } 6652 6653 PetscCall(MatGetRootType_Private(mat, &rtype)); 6654 6655 MatSeqXAIJGetOptions_Private(mpiaij->A); 6656 PetscCall(MatDestroy(&mpiaij->A)); 6657 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6658 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6659 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6660 6661 MatSeqXAIJGetOptions_Private(mpiaij->B); 6662 PetscCall(MatDestroy(&mpiaij->B)); 6663 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6664 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6665 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6666 6667 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6668 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6669 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6670 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6671 6672 a = (Mat_SeqAIJ *)mpiaij->A->data; 6673 b = (Mat_SeqAIJ *)mpiaij->B->data; 6674 a->free_a = PETSC_TRUE; 6675 a->free_ij = PETSC_TRUE; 6676 b->free_a = PETSC_TRUE; 6677 b->free_ij = PETSC_TRUE; 6678 a->maxnz = a->nz; 6679 b->maxnz = b->nz; 6680 6681 /* conversion must happen AFTER multiply setup */ 6682 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6683 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6684 PetscCall(VecDestroy(&mpiaij->lvec)); 6685 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6686 6687 // Put the COO struct in a container and then attach that to the matrix 6688 PetscCall(PetscMalloc1(1, &coo)); 6689 coo->n = coo_n; 6690 coo->sf = sf2; 6691 coo->sendlen = nleaves; 6692 coo->recvlen = nroots; 6693 coo->Annz = Annz; 6694 coo->Bnnz = Bnnz; 6695 coo->Annz2 = Annz2; 6696 coo->Bnnz2 = Bnnz2; 6697 coo->Atot1 = Atot1; 6698 coo->Atot2 = Atot2; 6699 coo->Btot1 = Btot1; 6700 coo->Btot2 = Btot2; 6701 coo->Ajmap1 = Ajmap1; 6702 coo->Aperm1 = Aperm1; 6703 coo->Bjmap1 = Bjmap1; 6704 coo->Bperm1 = Bperm1; 6705 coo->Aimap2 = Aimap2; 6706 coo->Ajmap2 = Ajmap2; 6707 coo->Aperm2 = Aperm2; 6708 coo->Bimap2 = Bimap2; 6709 coo->Bjmap2 = Bjmap2; 6710 coo->Bperm2 = Bperm2; 6711 coo->Cperm1 = Cperm1; 6712 // Allocate in preallocation. If not used, it has zero cost on host 6713 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6714 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6715 PetscCall(PetscContainerSetPointer(container, coo)); 6716 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6717 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6718 PetscCall(PetscContainerDestroy(&container)); 6719 PetscFunctionReturn(PETSC_SUCCESS); 6720 } 6721 6722 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6723 { 6724 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6725 Mat A = mpiaij->A, B = mpiaij->B; 6726 PetscScalar *Aa, *Ba; 6727 PetscScalar *sendbuf, *recvbuf; 6728 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6729 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6730 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6731 const PetscCount *Cperm1; 6732 PetscContainer container; 6733 MatCOOStruct_MPIAIJ *coo; 6734 6735 PetscFunctionBegin; 6736 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6737 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6738 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6739 sendbuf = coo->sendbuf; 6740 recvbuf = coo->recvbuf; 6741 Ajmap1 = coo->Ajmap1; 6742 Ajmap2 = coo->Ajmap2; 6743 Aimap2 = coo->Aimap2; 6744 Bjmap1 = coo->Bjmap1; 6745 Bjmap2 = coo->Bjmap2; 6746 Bimap2 = coo->Bimap2; 6747 Aperm1 = coo->Aperm1; 6748 Aperm2 = coo->Aperm2; 6749 Bperm1 = coo->Bperm1; 6750 Bperm2 = coo->Bperm2; 6751 Cperm1 = coo->Cperm1; 6752 6753 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6754 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6755 6756 /* Pack entries to be sent to remote */ 6757 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6758 6759 /* Send remote entries to their owner and overlap the communication with local computation */ 6760 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6761 /* Add local entries to A and B */ 6762 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6763 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6764 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6765 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6766 } 6767 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6768 PetscScalar sum = 0.0; 6769 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6770 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6771 } 6772 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6773 6774 /* Add received remote entries to A and B */ 6775 for (PetscCount i = 0; i < coo->Annz2; i++) { 6776 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6777 } 6778 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6779 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6780 } 6781 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6782 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6783 PetscFunctionReturn(PETSC_SUCCESS); 6784 } 6785 6786 /*MC 6787 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6788 6789 Options Database Keys: 6790 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6791 6792 Level: beginner 6793 6794 Notes: 6795 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6796 in this case the values associated with the rows and columns one passes in are set to zero 6797 in the matrix 6798 6799 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6800 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6801 6802 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6803 M*/ 6804 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6805 { 6806 Mat_MPIAIJ *b; 6807 PetscMPIInt size; 6808 6809 PetscFunctionBegin; 6810 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6811 6812 PetscCall(PetscNew(&b)); 6813 B->data = (void *)b; 6814 B->ops[0] = MatOps_Values; 6815 B->assembled = PETSC_FALSE; 6816 B->insertmode = NOT_SET_VALUES; 6817 b->size = size; 6818 6819 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6820 6821 /* build cache for off array entries formed */ 6822 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6823 6824 b->donotstash = PETSC_FALSE; 6825 b->colmap = NULL; 6826 b->garray = NULL; 6827 b->roworiented = PETSC_TRUE; 6828 6829 /* stuff used for matrix vector multiply */ 6830 b->lvec = NULL; 6831 b->Mvctx = NULL; 6832 6833 /* stuff for MatGetRow() */ 6834 b->rowindices = NULL; 6835 b->rowvalues = NULL; 6836 b->getrowactive = PETSC_FALSE; 6837 6838 /* flexible pointer used in CUSPARSE classes */ 6839 b->spptr = NULL; 6840 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6852 #if defined(PETSC_HAVE_CUDA) 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6854 #endif 6855 #if defined(PETSC_HAVE_HIP) 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6857 #endif 6858 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6860 #endif 6861 #if defined(PETSC_HAVE_MKL_SPARSE) 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6863 #endif 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6868 #if defined(PETSC_HAVE_ELEMENTAL) 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6870 #endif 6871 #if defined(PETSC_HAVE_SCALAPACK) 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6873 #endif 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6876 #if defined(PETSC_HAVE_HYPRE) 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6879 #endif 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6883 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6884 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6885 PetscFunctionReturn(PETSC_SUCCESS); 6886 } 6887 6888 /*@ 6889 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6890 and "off-diagonal" part of the matrix in CSR format. 6891 6892 Collective 6893 6894 Input Parameters: 6895 + comm - MPI communicator 6896 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6897 . n - This value should be the same as the local size used in creating the 6898 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6899 calculated if `N` is given) For square matrices `n` is almost always `m`. 6900 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6901 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6902 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6903 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6904 . a - matrix values 6905 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6906 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6907 - oa - matrix values 6908 6909 Output Parameter: 6910 . mat - the matrix 6911 6912 Level: advanced 6913 6914 Notes: 6915 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6916 must free the arrays once the matrix has been destroyed and not before. 6917 6918 The `i` and `j` indices are 0 based 6919 6920 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6921 6922 This sets local rows and cannot be used to set off-processor values. 6923 6924 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6925 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6926 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6927 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6928 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6929 communication if it is known that only local entries will be set. 6930 6931 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6932 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6933 @*/ 6934 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6935 { 6936 Mat_MPIAIJ *maij; 6937 6938 PetscFunctionBegin; 6939 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6940 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6941 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6942 PetscCall(MatCreate(comm, mat)); 6943 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6944 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6945 maij = (Mat_MPIAIJ *)(*mat)->data; 6946 6947 (*mat)->preallocated = PETSC_TRUE; 6948 6949 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6950 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6951 6952 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6953 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6954 6955 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6956 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6957 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6958 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6959 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6960 PetscFunctionReturn(PETSC_SUCCESS); 6961 } 6962 6963 typedef struct { 6964 Mat *mp; /* intermediate products */ 6965 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6966 PetscInt cp; /* number of intermediate products */ 6967 6968 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6969 PetscInt *startsj_s, *startsj_r; 6970 PetscScalar *bufa; 6971 Mat P_oth; 6972 6973 /* may take advantage of merging product->B */ 6974 Mat Bloc; /* B-local by merging diag and off-diag */ 6975 6976 /* cusparse does not have support to split between symbolic and numeric phases. 6977 When api_user is true, we don't need to update the numerical values 6978 of the temporary storage */ 6979 PetscBool reusesym; 6980 6981 /* support for COO values insertion */ 6982 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6983 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6984 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6985 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6986 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6987 PetscMemType mtype; 6988 6989 /* customization */ 6990 PetscBool abmerge; 6991 PetscBool P_oth_bind; 6992 } MatMatMPIAIJBACKEND; 6993 6994 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6995 { 6996 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6997 PetscInt i; 6998 6999 PetscFunctionBegin; 7000 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7001 PetscCall(PetscFree(mmdata->bufa)); 7002 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7003 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7004 PetscCall(MatDestroy(&mmdata->P_oth)); 7005 PetscCall(MatDestroy(&mmdata->Bloc)); 7006 PetscCall(PetscSFDestroy(&mmdata->sf)); 7007 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7008 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7009 PetscCall(PetscFree(mmdata->own[0])); 7010 PetscCall(PetscFree(mmdata->own)); 7011 PetscCall(PetscFree(mmdata->off[0])); 7012 PetscCall(PetscFree(mmdata->off)); 7013 PetscCall(PetscFree(mmdata)); 7014 PetscFunctionReturn(PETSC_SUCCESS); 7015 } 7016 7017 /* Copy selected n entries with indices in idx[] of A to v[]. 7018 If idx is NULL, copy the whole data array of A to v[] 7019 */ 7020 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7021 { 7022 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7023 7024 PetscFunctionBegin; 7025 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7026 if (f) { 7027 PetscCall((*f)(A, n, idx, v)); 7028 } else { 7029 const PetscScalar *vv; 7030 7031 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7032 if (n && idx) { 7033 PetscScalar *w = v; 7034 const PetscInt *oi = idx; 7035 PetscInt j; 7036 7037 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7038 } else { 7039 PetscCall(PetscArraycpy(v, vv, n)); 7040 } 7041 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7042 } 7043 PetscFunctionReturn(PETSC_SUCCESS); 7044 } 7045 7046 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7047 { 7048 MatMatMPIAIJBACKEND *mmdata; 7049 PetscInt i, n_d, n_o; 7050 7051 PetscFunctionBegin; 7052 MatCheckProduct(C, 1); 7053 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7054 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7055 if (!mmdata->reusesym) { /* update temporary matrices */ 7056 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7057 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7058 } 7059 mmdata->reusesym = PETSC_FALSE; 7060 7061 for (i = 0; i < mmdata->cp; i++) { 7062 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7063 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7064 } 7065 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7066 PetscInt noff; 7067 7068 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7069 if (mmdata->mptmp[i]) continue; 7070 if (noff) { 7071 PetscInt nown; 7072 7073 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7074 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7075 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7076 n_o += noff; 7077 n_d += nown; 7078 } else { 7079 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7080 7081 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7082 n_d += mm->nz; 7083 } 7084 } 7085 if (mmdata->hasoffproc) { /* offprocess insertion */ 7086 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7087 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7088 } 7089 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7090 PetscFunctionReturn(PETSC_SUCCESS); 7091 } 7092 7093 /* Support for Pt * A, A * P, or Pt * A * P */ 7094 #define MAX_NUMBER_INTERMEDIATE 4 7095 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7096 { 7097 Mat_Product *product = C->product; 7098 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7099 Mat_MPIAIJ *a, *p; 7100 MatMatMPIAIJBACKEND *mmdata; 7101 ISLocalToGlobalMapping P_oth_l2g = NULL; 7102 IS glob = NULL; 7103 const char *prefix; 7104 char pprefix[256]; 7105 const PetscInt *globidx, *P_oth_idx; 7106 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7107 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7108 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7109 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7110 /* a base offset; type-2: sparse with a local to global map table */ 7111 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7112 7113 MatProductType ptype; 7114 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7115 PetscMPIInt size; 7116 7117 PetscFunctionBegin; 7118 MatCheckProduct(C, 1); 7119 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7120 ptype = product->type; 7121 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7122 ptype = MATPRODUCT_AB; 7123 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7124 } 7125 switch (ptype) { 7126 case MATPRODUCT_AB: 7127 A = product->A; 7128 P = product->B; 7129 m = A->rmap->n; 7130 n = P->cmap->n; 7131 M = A->rmap->N; 7132 N = P->cmap->N; 7133 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7134 break; 7135 case MATPRODUCT_AtB: 7136 P = product->A; 7137 A = product->B; 7138 m = P->cmap->n; 7139 n = A->cmap->n; 7140 M = P->cmap->N; 7141 N = A->cmap->N; 7142 hasoffproc = PETSC_TRUE; 7143 break; 7144 case MATPRODUCT_PtAP: 7145 A = product->A; 7146 P = product->B; 7147 m = P->cmap->n; 7148 n = P->cmap->n; 7149 M = P->cmap->N; 7150 N = P->cmap->N; 7151 hasoffproc = PETSC_TRUE; 7152 break; 7153 default: 7154 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7155 } 7156 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7157 if (size == 1) hasoffproc = PETSC_FALSE; 7158 7159 /* defaults */ 7160 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7161 mp[i] = NULL; 7162 mptmp[i] = PETSC_FALSE; 7163 rmapt[i] = -1; 7164 cmapt[i] = -1; 7165 rmapa[i] = NULL; 7166 cmapa[i] = NULL; 7167 } 7168 7169 /* customization */ 7170 PetscCall(PetscNew(&mmdata)); 7171 mmdata->reusesym = product->api_user; 7172 if (ptype == MATPRODUCT_AB) { 7173 if (product->api_user) { 7174 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7175 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7176 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7177 PetscOptionsEnd(); 7178 } else { 7179 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7180 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7181 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7182 PetscOptionsEnd(); 7183 } 7184 } else if (ptype == MATPRODUCT_PtAP) { 7185 if (product->api_user) { 7186 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7187 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7188 PetscOptionsEnd(); 7189 } else { 7190 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7191 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7192 PetscOptionsEnd(); 7193 } 7194 } 7195 a = (Mat_MPIAIJ *)A->data; 7196 p = (Mat_MPIAIJ *)P->data; 7197 PetscCall(MatSetSizes(C, m, n, M, N)); 7198 PetscCall(PetscLayoutSetUp(C->rmap)); 7199 PetscCall(PetscLayoutSetUp(C->cmap)); 7200 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7201 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7202 7203 cp = 0; 7204 switch (ptype) { 7205 case MATPRODUCT_AB: /* A * P */ 7206 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7207 7208 /* A_diag * P_local (merged or not) */ 7209 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7210 /* P is product->B */ 7211 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7212 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7213 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7214 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7215 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7216 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7217 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7218 mp[cp]->product->api_user = product->api_user; 7219 PetscCall(MatProductSetFromOptions(mp[cp])); 7220 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7221 PetscCall(ISGetIndices(glob, &globidx)); 7222 rmapt[cp] = 1; 7223 cmapt[cp] = 2; 7224 cmapa[cp] = globidx; 7225 mptmp[cp] = PETSC_FALSE; 7226 cp++; 7227 } else { /* A_diag * P_diag and A_diag * P_off */ 7228 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7229 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7230 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7231 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7232 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7233 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7234 mp[cp]->product->api_user = product->api_user; 7235 PetscCall(MatProductSetFromOptions(mp[cp])); 7236 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7237 rmapt[cp] = 1; 7238 cmapt[cp] = 1; 7239 mptmp[cp] = PETSC_FALSE; 7240 cp++; 7241 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7242 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7243 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7244 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7245 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7246 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7247 mp[cp]->product->api_user = product->api_user; 7248 PetscCall(MatProductSetFromOptions(mp[cp])); 7249 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7250 rmapt[cp] = 1; 7251 cmapt[cp] = 2; 7252 cmapa[cp] = p->garray; 7253 mptmp[cp] = PETSC_FALSE; 7254 cp++; 7255 } 7256 7257 /* A_off * P_other */ 7258 if (mmdata->P_oth) { 7259 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7260 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7261 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7262 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7263 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7264 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7265 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7266 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7267 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7268 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7269 mp[cp]->product->api_user = product->api_user; 7270 PetscCall(MatProductSetFromOptions(mp[cp])); 7271 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7272 rmapt[cp] = 1; 7273 cmapt[cp] = 2; 7274 cmapa[cp] = P_oth_idx; 7275 mptmp[cp] = PETSC_FALSE; 7276 cp++; 7277 } 7278 break; 7279 7280 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7281 /* A is product->B */ 7282 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7283 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7284 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7285 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7286 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7287 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7288 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7289 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7290 mp[cp]->product->api_user = product->api_user; 7291 PetscCall(MatProductSetFromOptions(mp[cp])); 7292 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7293 PetscCall(ISGetIndices(glob, &globidx)); 7294 rmapt[cp] = 2; 7295 rmapa[cp] = globidx; 7296 cmapt[cp] = 2; 7297 cmapa[cp] = globidx; 7298 mptmp[cp] = PETSC_FALSE; 7299 cp++; 7300 } else { 7301 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7302 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7303 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7304 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7305 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7306 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7307 mp[cp]->product->api_user = product->api_user; 7308 PetscCall(MatProductSetFromOptions(mp[cp])); 7309 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7310 PetscCall(ISGetIndices(glob, &globidx)); 7311 rmapt[cp] = 1; 7312 cmapt[cp] = 2; 7313 cmapa[cp] = globidx; 7314 mptmp[cp] = PETSC_FALSE; 7315 cp++; 7316 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7317 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7318 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7319 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7320 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7321 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7322 mp[cp]->product->api_user = product->api_user; 7323 PetscCall(MatProductSetFromOptions(mp[cp])); 7324 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7325 rmapt[cp] = 2; 7326 rmapa[cp] = p->garray; 7327 cmapt[cp] = 2; 7328 cmapa[cp] = globidx; 7329 mptmp[cp] = PETSC_FALSE; 7330 cp++; 7331 } 7332 break; 7333 case MATPRODUCT_PtAP: 7334 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7335 /* P is product->B */ 7336 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7337 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7338 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7339 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7340 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7341 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7342 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7343 mp[cp]->product->api_user = product->api_user; 7344 PetscCall(MatProductSetFromOptions(mp[cp])); 7345 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7346 PetscCall(ISGetIndices(glob, &globidx)); 7347 rmapt[cp] = 2; 7348 rmapa[cp] = globidx; 7349 cmapt[cp] = 2; 7350 cmapa[cp] = globidx; 7351 mptmp[cp] = PETSC_FALSE; 7352 cp++; 7353 if (mmdata->P_oth) { 7354 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7355 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7356 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7357 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7358 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7359 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7360 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7361 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7362 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7363 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7364 mp[cp]->product->api_user = product->api_user; 7365 PetscCall(MatProductSetFromOptions(mp[cp])); 7366 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7367 mptmp[cp] = PETSC_TRUE; 7368 cp++; 7369 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7370 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7371 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7372 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7373 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7374 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7375 mp[cp]->product->api_user = product->api_user; 7376 PetscCall(MatProductSetFromOptions(mp[cp])); 7377 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7378 rmapt[cp] = 2; 7379 rmapa[cp] = globidx; 7380 cmapt[cp] = 2; 7381 cmapa[cp] = P_oth_idx; 7382 mptmp[cp] = PETSC_FALSE; 7383 cp++; 7384 } 7385 break; 7386 default: 7387 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7388 } 7389 /* sanity check */ 7390 if (size > 1) 7391 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7392 7393 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7394 for (i = 0; i < cp; i++) { 7395 mmdata->mp[i] = mp[i]; 7396 mmdata->mptmp[i] = mptmp[i]; 7397 } 7398 mmdata->cp = cp; 7399 C->product->data = mmdata; 7400 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7401 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7402 7403 /* memory type */ 7404 mmdata->mtype = PETSC_MEMTYPE_HOST; 7405 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7406 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7407 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7408 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7409 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7410 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7411 7412 /* prepare coo coordinates for values insertion */ 7413 7414 /* count total nonzeros of those intermediate seqaij Mats 7415 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7416 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7417 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7418 */ 7419 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7420 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7421 if (mptmp[cp]) continue; 7422 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7423 const PetscInt *rmap = rmapa[cp]; 7424 const PetscInt mr = mp[cp]->rmap->n; 7425 const PetscInt rs = C->rmap->rstart; 7426 const PetscInt re = C->rmap->rend; 7427 const PetscInt *ii = mm->i; 7428 for (i = 0; i < mr; i++) { 7429 const PetscInt gr = rmap[i]; 7430 const PetscInt nz = ii[i + 1] - ii[i]; 7431 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7432 else ncoo_oown += nz; /* this row is local */ 7433 } 7434 } else ncoo_d += mm->nz; 7435 } 7436 7437 /* 7438 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7439 7440 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7441 7442 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7443 7444 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7445 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7446 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7447 7448 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7449 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7450 */ 7451 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7452 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7453 7454 /* gather (i,j) of nonzeros inserted by remote procs */ 7455 if (hasoffproc) { 7456 PetscSF msf; 7457 PetscInt ncoo2, *coo_i2, *coo_j2; 7458 7459 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7460 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7461 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7462 7463 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7464 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7465 PetscInt *idxoff = mmdata->off[cp]; 7466 PetscInt *idxown = mmdata->own[cp]; 7467 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7468 const PetscInt *rmap = rmapa[cp]; 7469 const PetscInt *cmap = cmapa[cp]; 7470 const PetscInt *ii = mm->i; 7471 PetscInt *coi = coo_i + ncoo_o; 7472 PetscInt *coj = coo_j + ncoo_o; 7473 const PetscInt mr = mp[cp]->rmap->n; 7474 const PetscInt rs = C->rmap->rstart; 7475 const PetscInt re = C->rmap->rend; 7476 const PetscInt cs = C->cmap->rstart; 7477 for (i = 0; i < mr; i++) { 7478 const PetscInt *jj = mm->j + ii[i]; 7479 const PetscInt gr = rmap[i]; 7480 const PetscInt nz = ii[i + 1] - ii[i]; 7481 if (gr < rs || gr >= re) { /* this is an offproc row */ 7482 for (j = ii[i]; j < ii[i + 1]; j++) { 7483 *coi++ = gr; 7484 *idxoff++ = j; 7485 } 7486 if (!cmapt[cp]) { /* already global */ 7487 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7488 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7489 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7490 } else { /* offdiag */ 7491 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7492 } 7493 ncoo_o += nz; 7494 } else { /* this is a local row */ 7495 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7496 } 7497 } 7498 } 7499 mmdata->off[cp + 1] = idxoff; 7500 mmdata->own[cp + 1] = idxown; 7501 } 7502 7503 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7504 PetscInt incoo_o; 7505 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7506 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7507 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7508 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7509 ncoo = ncoo_d + ncoo_oown + ncoo2; 7510 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7511 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7512 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7513 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7514 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7515 PetscCall(PetscFree2(coo_i, coo_j)); 7516 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7517 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7518 coo_i = coo_i2; 7519 coo_j = coo_j2; 7520 } else { /* no offproc values insertion */ 7521 ncoo = ncoo_d; 7522 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7523 7524 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7525 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7526 PetscCall(PetscSFSetUp(mmdata->sf)); 7527 } 7528 mmdata->hasoffproc = hasoffproc; 7529 7530 /* gather (i,j) of nonzeros inserted locally */ 7531 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7532 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7533 PetscInt *coi = coo_i + ncoo_d; 7534 PetscInt *coj = coo_j + ncoo_d; 7535 const PetscInt *jj = mm->j; 7536 const PetscInt *ii = mm->i; 7537 const PetscInt *cmap = cmapa[cp]; 7538 const PetscInt *rmap = rmapa[cp]; 7539 const PetscInt mr = mp[cp]->rmap->n; 7540 const PetscInt rs = C->rmap->rstart; 7541 const PetscInt re = C->rmap->rend; 7542 const PetscInt cs = C->cmap->rstart; 7543 7544 if (mptmp[cp]) continue; 7545 if (rmapt[cp] == 1) { /* consecutive rows */ 7546 /* fill coo_i */ 7547 for (i = 0; i < mr; i++) { 7548 const PetscInt gr = i + rs; 7549 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7550 } 7551 /* fill coo_j */ 7552 if (!cmapt[cp]) { /* type-0, already global */ 7553 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7554 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7555 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7556 } else { /* type-2, local to global for sparse columns */ 7557 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7558 } 7559 ncoo_d += mm->nz; 7560 } else if (rmapt[cp] == 2) { /* sparse rows */ 7561 for (i = 0; i < mr; i++) { 7562 const PetscInt *jj = mm->j + ii[i]; 7563 const PetscInt gr = rmap[i]; 7564 const PetscInt nz = ii[i + 1] - ii[i]; 7565 if (gr >= rs && gr < re) { /* local rows */ 7566 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7567 if (!cmapt[cp]) { /* type-0, already global */ 7568 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7569 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7570 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7571 } else { /* type-2, local to global for sparse columns */ 7572 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7573 } 7574 ncoo_d += nz; 7575 } 7576 } 7577 } 7578 } 7579 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7580 PetscCall(ISDestroy(&glob)); 7581 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7582 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7583 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7584 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7585 7586 /* set block sizes */ 7587 A = product->A; 7588 P = product->B; 7589 switch (ptype) { 7590 case MATPRODUCT_PtAP: 7591 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7592 break; 7593 case MATPRODUCT_RARt: 7594 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7595 break; 7596 case MATPRODUCT_ABC: 7597 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7598 break; 7599 case MATPRODUCT_AB: 7600 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7601 break; 7602 case MATPRODUCT_AtB: 7603 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7604 break; 7605 case MATPRODUCT_ABt: 7606 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7607 break; 7608 default: 7609 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7610 } 7611 7612 /* preallocate with COO data */ 7613 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7614 PetscCall(PetscFree2(coo_i, coo_j)); 7615 PetscFunctionReturn(PETSC_SUCCESS); 7616 } 7617 7618 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7619 { 7620 Mat_Product *product = mat->product; 7621 #if defined(PETSC_HAVE_DEVICE) 7622 PetscBool match = PETSC_FALSE; 7623 PetscBool usecpu = PETSC_FALSE; 7624 #else 7625 PetscBool match = PETSC_TRUE; 7626 #endif 7627 7628 PetscFunctionBegin; 7629 MatCheckProduct(mat, 1); 7630 #if defined(PETSC_HAVE_DEVICE) 7631 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7632 if (match) { /* we can always fallback to the CPU if requested */ 7633 switch (product->type) { 7634 case MATPRODUCT_AB: 7635 if (product->api_user) { 7636 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7637 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7638 PetscOptionsEnd(); 7639 } else { 7640 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7641 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7642 PetscOptionsEnd(); 7643 } 7644 break; 7645 case MATPRODUCT_AtB: 7646 if (product->api_user) { 7647 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7648 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7649 PetscOptionsEnd(); 7650 } else { 7651 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7652 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7653 PetscOptionsEnd(); 7654 } 7655 break; 7656 case MATPRODUCT_PtAP: 7657 if (product->api_user) { 7658 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7659 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7660 PetscOptionsEnd(); 7661 } else { 7662 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7663 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7664 PetscOptionsEnd(); 7665 } 7666 break; 7667 default: 7668 break; 7669 } 7670 match = (PetscBool)!usecpu; 7671 } 7672 #endif 7673 if (match) { 7674 switch (product->type) { 7675 case MATPRODUCT_AB: 7676 case MATPRODUCT_AtB: 7677 case MATPRODUCT_PtAP: 7678 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7679 break; 7680 default: 7681 break; 7682 } 7683 } 7684 /* fallback to MPIAIJ ops */ 7685 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7686 PetscFunctionReturn(PETSC_SUCCESS); 7687 } 7688 7689 /* 7690 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7691 7692 n - the number of block indices in cc[] 7693 cc - the block indices (must be large enough to contain the indices) 7694 */ 7695 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7696 { 7697 PetscInt cnt = -1, nidx, j; 7698 const PetscInt *idx; 7699 7700 PetscFunctionBegin; 7701 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7702 if (nidx) { 7703 cnt = 0; 7704 cc[cnt] = idx[0] / bs; 7705 for (j = 1; j < nidx; j++) { 7706 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7707 } 7708 } 7709 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7710 *n = cnt + 1; 7711 PetscFunctionReturn(PETSC_SUCCESS); 7712 } 7713 7714 /* 7715 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7716 7717 ncollapsed - the number of block indices 7718 collapsed - the block indices (must be large enough to contain the indices) 7719 */ 7720 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7721 { 7722 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7723 7724 PetscFunctionBegin; 7725 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7726 for (i = start + 1; i < start + bs; i++) { 7727 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7728 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7729 cprevtmp = cprev; 7730 cprev = merged; 7731 merged = cprevtmp; 7732 } 7733 *ncollapsed = nprev; 7734 if (collapsed) *collapsed = cprev; 7735 PetscFunctionReturn(PETSC_SUCCESS); 7736 } 7737 7738 /* 7739 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7740 7741 Input Parameter: 7742 . Amat - matrix 7743 - symmetrize - make the result symmetric 7744 + scale - scale with diagonal 7745 7746 Output Parameter: 7747 . a_Gmat - output scalar graph >= 0 7748 7749 */ 7750 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7751 { 7752 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7753 MPI_Comm comm; 7754 Mat Gmat; 7755 PetscBool ismpiaij, isseqaij; 7756 Mat a, b, c; 7757 MatType jtype; 7758 7759 PetscFunctionBegin; 7760 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7761 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7762 PetscCall(MatGetSize(Amat, &MM, &NN)); 7763 PetscCall(MatGetBlockSize(Amat, &bs)); 7764 nloc = (Iend - Istart) / bs; 7765 7766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7767 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7768 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7769 7770 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7771 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7772 implementation */ 7773 if (bs > 1) { 7774 PetscCall(MatGetType(Amat, &jtype)); 7775 PetscCall(MatCreate(comm, &Gmat)); 7776 PetscCall(MatSetType(Gmat, jtype)); 7777 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7778 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7779 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7780 PetscInt *d_nnz, *o_nnz; 7781 MatScalar *aa, val, *AA; 7782 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7783 7784 if (isseqaij) { 7785 a = Amat; 7786 b = NULL; 7787 } else { 7788 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7789 a = d->A; 7790 b = d->B; 7791 } 7792 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7793 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7794 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7795 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7796 const PetscInt *cols1, *cols2; 7797 7798 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7799 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7800 nnz[brow / bs] = nc2 / bs; 7801 if (nc2 % bs) ok = 0; 7802 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7803 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7804 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7805 if (nc1 != nc2) ok = 0; 7806 else { 7807 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7808 if (cols1[jj] != cols2[jj]) ok = 0; 7809 if (cols1[jj] % bs != jj % bs) ok = 0; 7810 } 7811 } 7812 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7813 } 7814 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7815 if (!ok) { 7816 PetscCall(PetscFree2(d_nnz, o_nnz)); 7817 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7818 goto old_bs; 7819 } 7820 } 7821 } 7822 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7823 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7824 PetscCall(PetscFree2(d_nnz, o_nnz)); 7825 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7826 // diag 7827 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7828 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7829 7830 ai = aseq->i; 7831 n = ai[brow + 1] - ai[brow]; 7832 aj = aseq->j + ai[brow]; 7833 for (PetscInt k = 0; k < n; k += bs) { // block columns 7834 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7835 val = 0; 7836 if (index_size == 0) { 7837 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7838 aa = aseq->a + ai[brow + ii] + k; 7839 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7840 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7841 } 7842 } 7843 } else { // use (index,index) value if provided 7844 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7845 PetscInt ii = index[iii]; 7846 aa = aseq->a + ai[brow + ii] + k; 7847 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7848 PetscInt jj = index[jjj]; 7849 val += PetscAbs(PetscRealPart(aa[jj])); 7850 } 7851 } 7852 } 7853 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7854 AA[k / bs] = val; 7855 } 7856 grow = Istart / bs + brow / bs; 7857 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7858 } 7859 // off-diag 7860 if (ismpiaij) { 7861 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7862 const PetscScalar *vals; 7863 const PetscInt *cols, *garray = aij->garray; 7864 7865 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7866 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7867 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7868 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7869 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7870 AA[k / bs] = 0; 7871 AJ[cidx] = garray[cols[k]] / bs; 7872 } 7873 nc = ncols / bs; 7874 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7875 if (index_size == 0) { 7876 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7877 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7878 for (PetscInt k = 0; k < ncols; k += bs) { 7879 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7880 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7881 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7882 } 7883 } 7884 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7885 } 7886 } else { // use (index,index) value if provided 7887 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7888 PetscInt ii = index[iii]; 7889 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7890 for (PetscInt k = 0; k < ncols; k += bs) { 7891 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7892 PetscInt jj = index[jjj]; 7893 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7894 } 7895 } 7896 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7897 } 7898 } 7899 grow = Istart / bs + brow / bs; 7900 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7901 } 7902 } 7903 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7905 PetscCall(PetscFree2(AA, AJ)); 7906 } else { 7907 const PetscScalar *vals; 7908 const PetscInt *idx; 7909 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7910 old_bs: 7911 /* 7912 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7913 */ 7914 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7915 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7916 if (isseqaij) { 7917 PetscInt max_d_nnz; 7918 7919 /* 7920 Determine exact preallocation count for (sequential) scalar matrix 7921 */ 7922 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7923 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7924 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7925 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7926 PetscCall(PetscFree3(w0, w1, w2)); 7927 } else if (ismpiaij) { 7928 Mat Daij, Oaij; 7929 const PetscInt *garray; 7930 PetscInt max_d_nnz; 7931 7932 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7933 /* 7934 Determine exact preallocation count for diagonal block portion of scalar matrix 7935 */ 7936 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7937 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7938 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7939 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7940 PetscCall(PetscFree3(w0, w1, w2)); 7941 /* 7942 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7943 */ 7944 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7945 o_nnz[jj] = 0; 7946 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7947 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7948 o_nnz[jj] += ncols; 7949 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7950 } 7951 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7952 } 7953 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7954 /* get scalar copy (norms) of matrix */ 7955 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7956 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7957 PetscCall(PetscFree2(d_nnz, o_nnz)); 7958 for (Ii = Istart; Ii < Iend; Ii++) { 7959 PetscInt dest_row = Ii / bs; 7960 7961 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7962 for (jj = 0; jj < ncols; jj++) { 7963 PetscInt dest_col = idx[jj] / bs; 7964 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7965 7966 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7967 } 7968 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7969 } 7970 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7971 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7972 } 7973 } else { 7974 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7975 else { 7976 Gmat = Amat; 7977 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7978 } 7979 if (isseqaij) { 7980 a = Gmat; 7981 b = NULL; 7982 } else { 7983 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7984 a = d->A; 7985 b = d->B; 7986 } 7987 if (filter >= 0 || scale) { 7988 /* take absolute value of each entry */ 7989 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7990 MatInfo info; 7991 PetscScalar *avals; 7992 7993 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7994 PetscCall(MatSeqAIJGetArray(c, &avals)); 7995 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7996 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7997 } 7998 } 7999 } 8000 if (symmetrize) { 8001 PetscBool isset, issym; 8002 8003 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8004 if (!isset || !issym) { 8005 Mat matTrans; 8006 8007 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8008 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8009 PetscCall(MatDestroy(&matTrans)); 8010 } 8011 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8012 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8013 if (scale) { 8014 /* scale c for all diagonal values = 1 or -1 */ 8015 Vec diag; 8016 8017 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8018 PetscCall(MatGetDiagonal(Gmat, diag)); 8019 PetscCall(VecReciprocal(diag)); 8020 PetscCall(VecSqrtAbs(diag)); 8021 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8022 PetscCall(VecDestroy(&diag)); 8023 } 8024 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8025 if (filter >= 0) { 8026 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8027 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8028 } 8029 *a_Gmat = Gmat; 8030 PetscFunctionReturn(PETSC_SUCCESS); 8031 } 8032 8033 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8034 { 8035 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8036 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8037 8038 PetscFunctionBegin; 8039 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8040 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8041 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8042 PetscFunctionReturn(PETSC_SUCCESS); 8043 } 8044 8045 /* 8046 Special version for direct calls from Fortran 8047 */ 8048 8049 /* Change these macros so can be used in void function */ 8050 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8051 #undef PetscCall 8052 #define PetscCall(...) \ 8053 do { \ 8054 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8055 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8056 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8057 return; \ 8058 } \ 8059 } while (0) 8060 8061 #undef SETERRQ 8062 #define SETERRQ(comm, ierr, ...) \ 8063 do { \ 8064 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8065 return; \ 8066 } while (0) 8067 8068 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8069 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8070 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8071 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8072 #else 8073 #endif 8074 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8075 { 8076 Mat mat = *mmat; 8077 PetscInt m = *mm, n = *mn; 8078 InsertMode addv = *maddv; 8079 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8080 PetscScalar value; 8081 8082 MatCheckPreallocated(mat, 1); 8083 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8084 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8085 { 8086 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8087 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8088 PetscBool roworiented = aij->roworiented; 8089 8090 /* Some Variables required in the macro */ 8091 Mat A = aij->A; 8092 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8093 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8094 MatScalar *aa; 8095 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8096 Mat B = aij->B; 8097 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8098 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8099 MatScalar *ba; 8100 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8101 * cannot use "#if defined" inside a macro. */ 8102 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8103 8104 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8105 PetscInt nonew = a->nonew; 8106 MatScalar *ap1, *ap2; 8107 8108 PetscFunctionBegin; 8109 PetscCall(MatSeqAIJGetArray(A, &aa)); 8110 PetscCall(MatSeqAIJGetArray(B, &ba)); 8111 for (i = 0; i < m; i++) { 8112 if (im[i] < 0) continue; 8113 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8114 if (im[i] >= rstart && im[i] < rend) { 8115 row = im[i] - rstart; 8116 lastcol1 = -1; 8117 rp1 = aj + ai[row]; 8118 ap1 = aa + ai[row]; 8119 rmax1 = aimax[row]; 8120 nrow1 = ailen[row]; 8121 low1 = 0; 8122 high1 = nrow1; 8123 lastcol2 = -1; 8124 rp2 = bj + bi[row]; 8125 ap2 = ba + bi[row]; 8126 rmax2 = bimax[row]; 8127 nrow2 = bilen[row]; 8128 low2 = 0; 8129 high2 = nrow2; 8130 8131 for (j = 0; j < n; j++) { 8132 if (roworiented) value = v[i * n + j]; 8133 else value = v[i + j * m]; 8134 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8135 if (in[j] >= cstart && in[j] < cend) { 8136 col = in[j] - cstart; 8137 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8138 } else if (in[j] < 0) continue; 8139 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8140 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8141 } else { 8142 if (mat->was_assembled) { 8143 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8144 #if defined(PETSC_USE_CTABLE) 8145 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8146 col--; 8147 #else 8148 col = aij->colmap[in[j]] - 1; 8149 #endif 8150 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8151 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8152 col = in[j]; 8153 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8154 B = aij->B; 8155 b = (Mat_SeqAIJ *)B->data; 8156 bimax = b->imax; 8157 bi = b->i; 8158 bilen = b->ilen; 8159 bj = b->j; 8160 rp2 = bj + bi[row]; 8161 ap2 = ba + bi[row]; 8162 rmax2 = bimax[row]; 8163 nrow2 = bilen[row]; 8164 low2 = 0; 8165 high2 = nrow2; 8166 bm = aij->B->rmap->n; 8167 ba = b->a; 8168 inserted = PETSC_FALSE; 8169 } 8170 } else col = in[j]; 8171 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8172 } 8173 } 8174 } else if (!aij->donotstash) { 8175 if (roworiented) { 8176 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8177 } else { 8178 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8179 } 8180 } 8181 } 8182 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8183 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8184 } 8185 PetscFunctionReturnVoid(); 8186 } 8187 8188 /* Undefining these here since they were redefined from their original definition above! No 8189 * other PETSc functions should be defined past this point, as it is impossible to recover the 8190 * original definitions */ 8191 #undef PetscCall 8192 #undef SETERRQ 8193