1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow down the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 618 } 619 } 620 } else { 621 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 626 } else { 627 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 628 } 629 } 630 } 631 } 632 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 633 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /* 638 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 639 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 640 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 641 */ 642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 643 { 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 645 Mat A = aij->A; /* diagonal part of the matrix */ 646 Mat B = aij->B; /* off-diagonal part of the matrix */ 647 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 648 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 649 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 650 PetscInt *ailen = a->ilen, *aj = a->j; 651 PetscInt *bilen = b->ilen, *bj = b->j; 652 PetscInt am = aij->A->rmap->n, j; 653 PetscInt diag_so_far = 0, dnz; 654 PetscInt offd_so_far = 0, onz; 655 656 PetscFunctionBegin; 657 /* Iterate over all rows of the matrix */ 658 for (j = 0; j < am; j++) { 659 dnz = onz = 0; 660 /* Iterate over all non-zero columns of the current row */ 661 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 662 /* If column is in the diagonal */ 663 if (mat_j[col] >= cstart && mat_j[col] < cend) { 664 aj[diag_so_far++] = mat_j[col] - cstart; 665 dnz++; 666 } else { /* off-diagonal entries */ 667 bj[offd_so_far++] = mat_j[col]; 668 onz++; 669 } 670 } 671 ailen[j] = dnz; 672 bilen[j] = onz; 673 } 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 677 /* 678 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 679 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 680 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 681 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 682 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 683 */ 684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 685 { 686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 687 Mat A = aij->A; /* diagonal part of the matrix */ 688 Mat B = aij->B; /* off-diagonal part of the matrix */ 689 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 690 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 691 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 692 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 693 PetscInt *ailen = a->ilen, *aj = a->j; 694 PetscInt *bilen = b->ilen, *bj = b->j; 695 PetscInt am = aij->A->rmap->n, j; 696 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 697 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 698 PetscScalar *aa = a->a, *ba = b->a; 699 700 PetscFunctionBegin; 701 /* Iterate over all rows of the matrix */ 702 for (j = 0; j < am; j++) { 703 dnz_row = onz_row = 0; 704 rowstart_offd = full_offd_i[j]; 705 rowstart_diag = full_diag_i[j]; 706 /* Iterate over all non-zero columns of the current row */ 707 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 708 /* If column is in the diagonal */ 709 if (mat_j[col] >= cstart && mat_j[col] < cend) { 710 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 711 aa[rowstart_diag + dnz_row] = mat_a[col]; 712 dnz_row++; 713 } else { /* off-diagonal entries */ 714 bj[rowstart_offd + onz_row] = mat_j[col]; 715 ba[rowstart_offd + onz_row] = mat_a[col]; 716 onz_row++; 717 } 718 } 719 ailen[j] = dnz_row; 720 bilen[j] = onz_row; 721 } 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 728 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 730 731 PetscFunctionBegin; 732 for (i = 0; i < m; i++) { 733 if (idxm[i] < 0) continue; /* negative row */ 734 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 735 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 736 row = idxm[i] - rstart; 737 for (j = 0; j < n; j++) { 738 if (idxn[j] < 0) continue; /* negative column */ 739 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 743 } else { 744 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 745 #if defined(PETSC_USE_CTABLE) 746 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 747 col--; 748 #else 749 col = aij->colmap[idxn[j]] - 1; 750 #endif 751 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 752 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 753 } 754 } 755 } 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 762 PetscInt nstash, reallocs; 763 764 PetscFunctionBegin; 765 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 766 767 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 768 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 769 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 776 PetscMPIInt n; 777 PetscInt i, j, rstart, ncols, flg; 778 PetscInt *row, *col; 779 PetscBool all_assembled; 780 PetscScalar *val; 781 782 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 783 784 PetscFunctionBegin; 785 if (!aij->donotstash && !mat->nooffprocentries) { 786 while (1) { 787 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 788 if (!flg) break; 789 790 for (i = 0; i < n;) { 791 /* Now identify the consecutive vals belonging to the same row */ 792 for (j = i, rstart = row[j]; j < n; j++) { 793 if (row[j] != rstart) break; 794 } 795 if (j < n) ncols = j - i; 796 else ncols = n - i; 797 /* Now assemble all these values with a single function call */ 798 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 799 i = j; 800 } 801 } 802 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 803 } 804 #if defined(PETSC_HAVE_DEVICE) 805 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 806 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 807 if (mat->boundtocpu) { 808 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 809 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 810 } 811 #endif 812 PetscCall(MatAssemblyBegin(aij->A, mode)); 813 PetscCall(MatAssemblyEnd(aij->A, mode)); 814 815 /* determine if any process has disassembled, if so we must 816 also disassemble ourself, in order that we may reassemble. */ 817 /* 818 if nonzero structure of submatrix B cannot change then we know that 819 no process disassembled thus we can skip this stuff 820 */ 821 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 822 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 823 if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 824 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 825 } 826 } 827 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 828 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 829 #if defined(PETSC_HAVE_DEVICE) 830 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 831 #endif 832 PetscCall(MatAssemblyBegin(aij->B, mode)); 833 PetscCall(MatAssemblyEnd(aij->B, mode)); 834 835 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 836 837 aij->rowvalues = NULL; 838 839 PetscCall(VecDestroy(&aij->diag)); 840 841 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 842 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 843 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 844 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 845 } 846 #if defined(PETSC_HAVE_DEVICE) 847 mat->offloadmask = PETSC_OFFLOAD_BOTH; 848 #endif 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 855 856 PetscFunctionBegin; 857 PetscCall(MatZeroEntries(l->A)); 858 PetscCall(MatZeroEntries(l->B)); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 869 PetscFunctionBegin; 870 /* get locally owned rows */ 871 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 872 PetscCall(MatHasCongruentLayouts(A, &cong)); 873 /* fix right-hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 879 PetscCall(VecGetArrayRead(x, &xx)); 880 PetscCall(VecGetArray(b, &bb)); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 882 PetscCall(VecRestoreArrayRead(x, &xx)); 883 PetscCall(VecRestoreArray(b, &bb)); 884 } 885 886 if (diag != 0.0 && cong) { 887 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 890 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 891 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 892 PetscInt nnwA, nnwB; 893 PetscBool nnzA, nnzB; 894 895 nnwA = aijA->nonew; 896 nnwB = aijB->nonew; 897 nnzA = aijA->keepnonzeropattern; 898 nnzB = aijB->keepnonzeropattern; 899 if (!nnzA) { 900 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 901 aijA->nonew = 0; 902 } 903 if (!nnzB) { 904 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 905 aijB->nonew = 0; 906 } 907 /* Must zero here before the next loop */ 908 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 909 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 910 for (r = 0; r < len; ++r) { 911 const PetscInt row = lrows[r] + A->rmap->rstart; 912 if (row >= A->cmap->N) continue; 913 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 914 } 915 aijA->nonew = nnwA; 916 aijB->nonew = nnwB; 917 } else { 918 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 919 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 920 } 921 PetscCall(PetscFree(lrows)); 922 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 927 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 928 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 929 } 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 934 { 935 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 936 PetscInt n = A->rmap->n; 937 PetscInt i, j, r, m, len = 0; 938 PetscInt *lrows, *owners = A->rmap->range; 939 PetscMPIInt p = 0; 940 PetscSFNode *rrows; 941 PetscSF sf; 942 const PetscScalar *xx; 943 PetscScalar *bb, *mask, *aij_a; 944 Vec xmask, lmask; 945 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 946 const PetscInt *aj, *ii, *ridx; 947 PetscScalar *aa; 948 949 PetscFunctionBegin; 950 /* Create SF where leaves are input rows and roots are owned rows */ 951 PetscCall(PetscMalloc1(n, &lrows)); 952 for (r = 0; r < n; ++r) lrows[r] = -1; 953 PetscCall(PetscMalloc1(N, &rrows)); 954 for (r = 0; r < N; ++r) { 955 const PetscInt idx = rows[r]; 956 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 957 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 958 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 959 } 960 rrows[r].rank = p; 961 rrows[r].index = rows[r] - owners[p]; 962 } 963 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 964 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 965 /* Collect flags for rows to be zeroed */ 966 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 967 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFDestroy(&sf)); 969 /* Compress and put in row numbers */ 970 for (r = 0; r < n; ++r) 971 if (lrows[r] >= 0) lrows[len++] = r; 972 /* zero diagonal part of matrix */ 973 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 974 /* handle off-diagonal part of matrix */ 975 PetscCall(MatCreateVecs(A, &xmask, NULL)); 976 PetscCall(VecDuplicate(l->lvec, &lmask)); 977 PetscCall(VecGetArray(xmask, &bb)); 978 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 979 PetscCall(VecRestoreArray(xmask, &bb)); 980 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecDestroy(&xmask)); 983 if (x && b) { /* this code is buggy when the row and column layout don't match */ 984 PetscBool cong; 985 986 PetscCall(MatHasCongruentLayouts(A, &cong)); 987 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 988 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecGetArrayRead(l->lvec, &xx)); 991 PetscCall(VecGetArray(b, &bb)); 992 } 993 PetscCall(VecGetArray(lmask, &mask)); 994 /* remove zeroed rows of off-diagonal matrix */ 995 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 996 ii = aij->i; 997 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 998 /* loop over all elements of off process part of matrix zeroing removed columns*/ 999 if (aij->compressedrow.use) { 1000 m = aij->compressedrow.nrows; 1001 ii = aij->compressedrow.i; 1002 ridx = aij->compressedrow.rindex; 1003 for (i = 0; i < m; i++) { 1004 n = ii[i + 1] - ii[i]; 1005 aj = aij->j + ii[i]; 1006 aa = aij_a + ii[i]; 1007 1008 for (j = 0; j < n; j++) { 1009 if (PetscAbsScalar(mask[*aj])) { 1010 if (b) bb[*ridx] -= *aa * xx[*aj]; 1011 *aa = 0.0; 1012 } 1013 aa++; 1014 aj++; 1015 } 1016 ridx++; 1017 } 1018 } else { /* do not use compressed row format */ 1019 m = l->B->rmap->n; 1020 for (i = 0; i < m; i++) { 1021 n = ii[i + 1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij_a + ii[i]; 1024 for (j = 0; j < n; j++) { 1025 if (PetscAbsScalar(mask[*aj])) { 1026 if (b) bb[i] -= *aa * xx[*aj]; 1027 *aa = 0.0; 1028 } 1029 aa++; 1030 aj++; 1031 } 1032 } 1033 } 1034 if (x && b) { 1035 PetscCall(VecRestoreArray(b, &bb)); 1036 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1037 } 1038 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1039 PetscCall(VecRestoreArray(lmask, &mask)); 1040 PetscCall(VecDestroy(&lmask)); 1041 PetscCall(PetscFree(lrows)); 1042 1043 /* only change matrix nonzero state if pattern was allowed to be changed */ 1044 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1045 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1046 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1047 } 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 PetscInt nt; 1055 VecScatter Mvctx = a->Mvctx; 1056 1057 PetscFunctionBegin; 1058 PetscCall(VecGetLocalSize(xx, &nt)); 1059 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1060 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1061 PetscUseTypeMethod(a->A, mult, xx, yy); 1062 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1063 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 1071 PetscFunctionBegin; 1072 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1073 PetscFunctionReturn(PETSC_SUCCESS); 1074 } 1075 1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1079 VecScatter Mvctx = a->Mvctx; 1080 1081 PetscFunctionBegin; 1082 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1084 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1085 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 /* do nondiagonal part */ 1095 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1096 /* do local part */ 1097 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1098 /* add partial results together */ 1099 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1100 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1105 { 1106 MPI_Comm comm; 1107 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1108 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1109 IS Me, Notme; 1110 PetscInt M, N, first, last, *notme, i; 1111 PetscBool lf; 1112 PetscMPIInt size; 1113 1114 PetscFunctionBegin; 1115 /* Easy test: symmetric diagonal block */ 1116 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1117 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1118 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1119 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1120 PetscCallMPI(MPI_Comm_size(comm, &size)); 1121 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1122 1123 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1124 PetscCall(MatGetSize(Amat, &M, &N)); 1125 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1126 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1127 for (i = 0; i < first; i++) notme[i] = i; 1128 for (i = last; i < M; i++) notme[i - last + first] = i; 1129 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1130 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1131 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1132 Aoff = Aoffs[0]; 1133 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1134 Boff = Boffs[0]; 1135 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1136 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1137 PetscCall(MatDestroyMatrices(1, &Boffs)); 1138 PetscCall(ISDestroy(&Me)); 1139 PetscCall(ISDestroy(&Notme)); 1140 PetscCall(PetscFree(notme)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1151 /* do local part */ 1152 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1153 /* add partial results together */ 1154 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1155 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1169 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1170 PetscCall(MatGetDiagonal(a->A, v)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatScale(a->A, aa)); 1180 PetscCall(MatScale(a->B, aa)); 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1187 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1188 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1189 const PetscInt *garray = aij->garray; 1190 const PetscScalar *aa, *ba; 1191 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1192 PetscInt64 nz, hnz; 1193 PetscInt *rowlens; 1194 PetscInt *colidxs; 1195 PetscScalar *matvals; 1196 PetscMPIInt rank; 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscViewerSetUp(viewer)); 1200 1201 M = mat->rmap->N; 1202 N = mat->cmap->N; 1203 m = mat->rmap->n; 1204 rs = mat->rmap->rstart; 1205 cs = mat->cmap->rstart; 1206 nz = A->nz + B->nz; 1207 1208 /* write matrix header */ 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = M; 1211 header[2] = N; 1212 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1213 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1214 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1215 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1216 1217 /* fill in and store row lengths */ 1218 PetscCall(PetscMalloc1(m, &rowlens)); 1219 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1220 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1221 PetscCall(PetscFree(rowlens)); 1222 1223 /* fill in and store column indices */ 1224 PetscCall(PetscMalloc1(nz, &colidxs)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 colidxs[cnt++] = garray[B->j[jb]]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1231 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1235 PetscCall(PetscFree(colidxs)); 1236 1237 /* fill in and store nonzero values */ 1238 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1239 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1240 PetscCall(PetscMalloc1(nz, &matvals)); 1241 for (cnt = 0, i = 0; i < m; i++) { 1242 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1247 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1248 } 1249 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1251 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1253 PetscCall(PetscFree(matvals)); 1254 1255 /* write block size option to the viewer's .info file */ 1256 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 #include <petscdraw.h> 1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1264 PetscMPIInt rank = aij->rank, size = aij->size; 1265 PetscBool isdraw, isascii, isbinary; 1266 PetscViewer sviewer; 1267 PetscViewerFormat format; 1268 1269 PetscFunctionBegin; 1270 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1273 if (isascii) { 1274 PetscCall(PetscViewerGetFormat(viewer, &format)); 1275 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1276 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1277 PetscCall(PetscMalloc1(size, &nz)); 1278 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1279 for (i = 0; i < size; i++) { 1280 nmax = PetscMax(nmax, nz[i]); 1281 nmin = PetscMin(nmin, nz[i]); 1282 navg += nz[i]; 1283 } 1284 PetscCall(PetscFree(nz)); 1285 navg = navg / size; 1286 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 PetscCall(PetscViewerGetFormat(viewer, &format)); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscInt *inodes = NULL; 1293 1294 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1295 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1296 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1297 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1298 if (!inodes) { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1300 info.memory)); 1301 } else { 1302 PetscCall( 1303 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1304 } 1305 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1307 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1309 PetscCall(PetscViewerFlush(viewer)); 1310 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1312 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount, inodelimit, *inodes; 1316 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1317 if (inodes) { 1318 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1319 } else { 1320 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1321 } 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1329 PetscCall(MatView(aij->A, viewer)); 1330 } else { 1331 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1332 } 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isascii && size == 1) { 1335 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1336 PetscCall(MatView(aij->A, viewer)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } else if (isdraw) { 1339 PetscDraw draw; 1340 PetscBool isnull; 1341 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1342 PetscCall(PetscDrawIsNull(draw, &isnull)); 1343 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 1346 { /* assemble the entire matrix onto first processor */ 1347 Mat A = NULL, Av; 1348 IS isrow, iscol; 1349 1350 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1352 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1353 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1354 /* The commented code uses MatCreateSubMatrices instead */ 1355 /* 1356 Mat *AA, A = NULL, Av; 1357 IS isrow,iscol; 1358 1359 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1361 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1362 if (rank == 0) { 1363 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1364 A = AA[0]; 1365 Av = AA[0]; 1366 } 1367 PetscCall(MatDestroySubMatrices(1,&AA)); 1368 */ 1369 PetscCall(ISDestroy(&iscol)); 1370 PetscCall(ISDestroy(&isrow)); 1371 /* 1372 Everyone has to call to draw the matrix since the graphics waits are 1373 synchronized across all processors that share the PetscDraw object 1374 */ 1375 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 if (rank == 0) { 1377 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1378 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1379 } 1380 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1387 { 1388 PetscBool isascii, isdraw, issocket, isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1395 if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1412 1413 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1414 if (flag & SOR_ZERO_INITIAL_GUESS) { 1415 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1416 its--; 1417 } 1418 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1447 if (flag & SOR_ZERO_INITIAL_GUESS) { 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1449 its--; 1450 } 1451 while (its--) { 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 1455 /* update rhs: bb1 = bb - B*x */ 1456 PetscCall(VecScale(mat->lvec, -1.0)); 1457 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1458 1459 /* local sweep */ 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1461 } 1462 } else if (flag & SOR_EISENSTAT) { 1463 Vec xx1; 1464 1465 PetscCall(VecDuplicate(bb, &xx1)); 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1467 1468 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 if (!mat->diag) { 1471 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1472 PetscCall(MatGetDiagonal(matin, mat->diag)); 1473 } 1474 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1475 if (hasop) { 1476 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1477 } else { 1478 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1479 } 1480 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1481 1482 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1483 1484 /* local sweep */ 1485 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1486 PetscCall(VecAXPY(xx, 1.0, xx1)); 1487 PetscCall(VecDestroy(&xx1)); 1488 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1489 1490 PetscCall(VecDestroy(&bb1)); 1491 1492 matin->factorerrortype = mat->A->factorerrortype; 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1497 { 1498 Mat aA, aB, Aperm; 1499 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1500 PetscScalar *aa, *ba; 1501 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1502 PetscSF rowsf, sf; 1503 IS parcolp = NULL; 1504 PetscBool done; 1505 1506 PetscFunctionBegin; 1507 PetscCall(MatGetLocalSize(A, &m, &n)); 1508 PetscCall(ISGetIndices(rowp, &rwant)); 1509 PetscCall(ISGetIndices(colp, &cwant)); 1510 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1511 1512 /* Invert row permutation to find out where my rows should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1514 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1515 PetscCall(PetscSFSetFromOptions(rowsf)); 1516 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 1520 /* Invert column permutation to find out where my columns should go */ 1521 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1522 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1523 PetscCall(PetscSFSetFromOptions(sf)); 1524 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1525 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1526 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFDestroy(&sf)); 1528 1529 PetscCall(ISRestoreIndices(rowp, &rwant)); 1530 PetscCall(ISRestoreIndices(colp, &cwant)); 1531 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1532 1533 /* Find out where my gcols should go */ 1534 PetscCall(MatGetSize(aB, NULL, &ng)); 1535 PetscCall(PetscMalloc1(ng, &gcdest)); 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1540 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFDestroy(&sf)); 1542 1543 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1544 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1545 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1546 for (i = 0; i < m; i++) { 1547 PetscInt row = rdest[i]; 1548 PetscMPIInt rowner; 1549 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1550 for (j = ai[i]; j < ai[i + 1]; j++) { 1551 PetscInt col = cdest[aj[j]]; 1552 PetscMPIInt cowner; 1553 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1554 if (rowner == cowner) dnnz[i]++; 1555 else onnz[i]++; 1556 } 1557 for (j = bi[i]; j < bi[i + 1]; j++) { 1558 PetscInt col = gcdest[bj[j]]; 1559 PetscMPIInt cowner; 1560 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1561 if (rowner == cowner) dnnz[i]++; 1562 else onnz[i]++; 1563 } 1564 } 1565 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1566 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&rowsf)); 1570 1571 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1572 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1573 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1574 for (i = 0; i < m; i++) { 1575 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1576 PetscInt j0, rowlen; 1577 rowlen = ai[i + 1] - ai[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1579 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1581 } 1582 rowlen = bi[i + 1] - bi[i]; 1583 for (j0 = j = 0; j < rowlen; j0 = j) { 1584 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1585 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1586 } 1587 } 1588 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1589 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1591 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1592 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1593 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1594 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1595 PetscCall(PetscFree3(work, rdest, cdest)); 1596 PetscCall(PetscFree(gcdest)); 1597 if (parcolp) PetscCall(ISDestroy(&colp)); 1598 *B = Aperm; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1603 { 1604 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1605 1606 PetscFunctionBegin; 1607 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1608 if (ghosts) *ghosts = aij->garray; 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1613 { 1614 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1615 Mat A = mat->A, B = mat->B; 1616 PetscLogDouble isend[5], irecv[5]; 1617 1618 PetscFunctionBegin; 1619 info->block_size = 1.0; 1620 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1621 1622 isend[0] = info->nz_used; 1623 isend[1] = info->nz_allocated; 1624 isend[2] = info->nz_unneeded; 1625 isend[3] = info->memory; 1626 isend[4] = info->mallocs; 1627 1628 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1629 1630 isend[0] += info->nz_used; 1631 isend[1] += info->nz_allocated; 1632 isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; 1634 isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A, 1); 1679 PetscCall(MatSetOption(a->A, op, flg)); 1680 PetscCall(MatSetOption(a->B, op, flg)); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A, 1); 1684 a->roworiented = flg; 1685 1686 PetscCall(MatSetOption(a->A, op, flg)); 1687 PetscCall(MatSetOption(a->B, op, flg)); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 default: 1707 break; 1708 } 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1713 { 1714 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1715 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1716 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1717 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1718 PetscInt *cmap, *idx_p; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1722 mat->getrowactive = PETSC_TRUE; 1723 1724 if (!mat->rowvalues && (idx || v)) { 1725 /* 1726 allocate enough space to hold information from the longest row. 1727 */ 1728 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1729 PetscInt max = 1, tmp; 1730 for (i = 0; i < matin->rmap->n; i++) { 1731 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1732 if (max < tmp) max = tmp; 1733 } 1734 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1735 } 1736 1737 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1738 lrow = row - rstart; 1739 1740 pvA = &vworkA; 1741 pcA = &cworkA; 1742 pvB = &vworkB; 1743 pcB = &cworkB; 1744 if (!v) { 1745 pvA = NULL; 1746 pvB = NULL; 1747 } 1748 if (!idx) { 1749 pcA = NULL; 1750 if (!v) pcB = NULL; 1751 } 1752 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1753 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1754 nztot = nzA + nzB; 1755 1756 cmap = mat->garray; 1757 if (v || idx) { 1758 if (nztot) { 1759 /* Sort by increasing column numbers, assuming A and B already sorted */ 1760 PetscInt imark = -1; 1761 if (v) { 1762 *v = v_p = mat->rowvalues; 1763 for (i = 0; i < nzB; i++) { 1764 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1765 else break; 1766 } 1767 imark = i; 1768 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1769 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1770 } 1771 if (idx) { 1772 *idx = idx_p = mat->rowindices; 1773 if (imark > -1) { 1774 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1775 } else { 1776 for (i = 0; i < nzB; i++) { 1777 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1778 else break; 1779 } 1780 imark = i; 1781 } 1782 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1783 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1784 } 1785 } else { 1786 if (idx) *idx = NULL; 1787 if (v) *v = NULL; 1788 } 1789 } 1790 *nz = nztot; 1791 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1792 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 1800 PetscFunctionBegin; 1801 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1802 aij->getrowactive = PETSC_FALSE; 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1807 { 1808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1809 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1810 PetscInt i, j, cstart = mat->cmap->rstart; 1811 PetscReal sum = 0.0; 1812 const MatScalar *v, *amata, *bmata; 1813 1814 PetscFunctionBegin; 1815 if (aij->size == 1) { 1816 PetscCall(MatNorm(aij->A, type, norm)); 1817 } else { 1818 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1819 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1820 if (type == NORM_FROBENIUS) { 1821 v = amata; 1822 for (i = 0; i < amat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v) * (*v)); 1824 v++; 1825 } 1826 v = bmata; 1827 for (i = 0; i < bmat->nz; i++) { 1828 sum += PetscRealPart(PetscConj(*v) * (*v)); 1829 v++; 1830 } 1831 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1832 *norm = PetscSqrtReal(*norm); 1833 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1834 } else if (type == NORM_1) { /* max column norm */ 1835 PetscReal *tmp; 1836 PetscInt *jj, *garray = aij->garray; 1837 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1838 *norm = 0.0; 1839 v = amata; 1840 jj = amat->j; 1841 for (j = 0; j < amat->nz; j++) { 1842 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1843 v++; 1844 } 1845 v = bmata; 1846 jj = bmat->j; 1847 for (j = 0; j < bmat->nz; j++) { 1848 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1849 v++; 1850 } 1851 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1852 for (j = 0; j < mat->cmap->N; j++) { 1853 if (tmp[j] > *norm) *norm = tmp[j]; 1854 } 1855 PetscCall(PetscFree(tmp)); 1856 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1857 } else if (type == NORM_INFINITY) { /* max row norm */ 1858 PetscReal ntemp = 0.0; 1859 for (j = 0; j < aij->A->rmap->n; j++) { 1860 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1861 sum = 0.0; 1862 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1867 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1868 sum += PetscAbsScalar(*v); 1869 v++; 1870 } 1871 if (sum > ntemp) ntemp = sum; 1872 } 1873 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1874 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1875 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1876 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1878 } 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1883 { 1884 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1885 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1886 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1887 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1888 Mat B, A_diag, *B_diag; 1889 const MatScalar *pbv, *bv; 1890 1891 PetscFunctionBegin; 1892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1893 ma = A->rmap->n; 1894 na = A->cmap->n; 1895 mb = a->B->rmap->n; 1896 nb = a->B->cmap->n; 1897 ai = Aloc->i; 1898 aj = Aloc->j; 1899 bi = Bloc->i; 1900 bj = Bloc->j; 1901 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1902 PetscInt *d_nnz, *g_nnz, *o_nnz; 1903 PetscSFNode *oloc; 1904 PETSC_UNUSED PetscSF sf; 1905 1906 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1907 /* compute d_nnz for preallocation */ 1908 PetscCall(PetscArrayzero(d_nnz, na)); 1909 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1910 /* compute local off-diagonal contributions */ 1911 PetscCall(PetscArrayzero(g_nnz, nb)); 1912 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1915 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1916 PetscCall(PetscSFSetFromOptions(sf)); 1917 PetscCall(PetscArrayzero(o_nnz, na)); 1918 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1919 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFDestroy(&sf)); 1921 1922 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1923 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1924 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1925 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1926 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1927 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1928 } else { 1929 B = *matout; 1930 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1931 } 1932 1933 b = (Mat_MPIAIJ *)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1943 1944 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1945 very quickly (=without using MatSetValues), because all writes are local. */ 1946 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1947 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1948 1949 /* copy over the B part */ 1950 PetscCall(PetscMalloc1(bi[mb], &cols)); 1951 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1952 pbv = bv; 1953 row = A->rmap->rstart; 1954 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1955 cols_tmp = cols; 1956 for (i = 0; i < mb; i++) { 1957 ncol = bi[i + 1] - bi[i]; 1958 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1959 row++; 1960 if (pbv) pbv += ncol; 1961 if (cols_tmp) cols_tmp += ncol; 1962 } 1963 PetscCall(PetscFree(cols)); 1964 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1965 1966 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1967 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 PetscCall(MatHeaderMerge(A, &B)); 1972 } 1973 PetscFunctionReturn(PETSC_SUCCESS); 1974 } 1975 1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1979 Mat a = aij->A, b = aij->B; 1980 PetscInt s1, s2, s3; 1981 1982 PetscFunctionBegin; 1983 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1984 if (rr) { 1985 PetscCall(VecGetLocalSize(rr, &s1)); 1986 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1987 /* Overlap communication with computation. */ 1988 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1989 } 1990 if (ll) { 1991 PetscCall(VecGetLocalSize(ll, &s1)); 1992 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1993 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1994 } 1995 /* scale the diagonal block */ 1996 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1997 1998 if (rr) { 1999 /* Do a scatter end and then right scale the off-diagonal block */ 2000 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2001 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2002 } 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatSetUnfactored(a->A)); 2012 PetscFunctionReturn(PETSC_SUCCESS); 2013 } 2014 2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2016 { 2017 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2018 Mat a, b, c, d; 2019 PetscBool flg; 2020 2021 PetscFunctionBegin; 2022 a = matA->A; 2023 b = matA->B; 2024 c = matB->A; 2025 d = matB->B; 2026 2027 PetscCall(MatEqual(a, c, &flg)); 2028 if (flg) PetscCall(MatEqual(b, d, &flg)); 2029 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2036 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2037 2038 PetscFunctionBegin; 2039 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2040 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2041 /* because of the column compression in the off-processor part of the matrix a->B, 2042 the number of columns in a->B and b->B may be different, hence we cannot call 2043 the MatCopy() directly on the two parts. If need be, we can provide a more 2044 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2045 then copying the submatrices */ 2046 PetscCall(MatCopy_Basic(A, B, str)); 2047 } else { 2048 PetscCall(MatCopy(a->A, b->A, str)); 2049 PetscCall(MatCopy(a->B, b->B, str)); 2050 } 2051 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 /* 2056 Computes the number of nonzeros per row needed for preallocation when X and Y 2057 have different nonzero structure. 2058 */ 2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2060 { 2061 PetscInt i, j, k, nzx, nzy; 2062 2063 PetscFunctionBegin; 2064 /* Set the number of nonzeros in the new matrix */ 2065 for (i = 0; i < m; i++) { 2066 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2067 nzx = xi[i + 1] - xi[i]; 2068 nzy = yi[i + 1] - yi[i]; 2069 nnz[i] = 0; 2070 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2071 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2072 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2073 nnz[i]++; 2074 } 2075 for (; k < nzy; k++) nnz[i]++; 2076 } 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2082 { 2083 PetscInt m = Y->rmap->N; 2084 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2085 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2086 2087 PetscFunctionBegin; 2088 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2093 { 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2099 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d, *nnz_o; 2105 2106 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2107 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2108 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2109 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2110 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2111 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2112 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2113 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2114 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2115 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2116 PetscCall(MatHeaderMerge(Y, &B)); 2117 PetscCall(PetscFree(nnz_d)); 2118 PetscCall(PetscFree(nnz_o)); 2119 } 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 PetscFunctionBegin; 2128 if (PetscDefined(USE_COMPLEX)) { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2130 2131 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2132 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2133 } 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatRealPart(a->A)); 2143 PetscCall(MatRealPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 2151 PetscFunctionBegin; 2152 PetscCall(MatImaginaryPart(a->A)); 2153 PetscCall(MatImaginaryPart(a->B)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2158 { 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2160 PetscInt i, *idxb = NULL, m = A->rmap->n; 2161 PetscScalar *vv; 2162 Vec vB, vA; 2163 const PetscScalar *va, *vb; 2164 2165 PetscFunctionBegin; 2166 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2167 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2168 2169 PetscCall(VecGetArrayRead(vA, &va)); 2170 if (idx) { 2171 for (i = 0; i < m; i++) { 2172 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2173 } 2174 } 2175 2176 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2177 PetscCall(PetscMalloc1(m, &idxb)); 2178 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2179 2180 PetscCall(VecGetArrayWrite(v, &vv)); 2181 PetscCall(VecGetArrayRead(vB, &vb)); 2182 for (i = 0; i < m; i++) { 2183 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2184 vv[i] = vb[i]; 2185 if (idx) idx[i] = a->garray[idxb[i]]; 2186 } else { 2187 vv[i] = va[i]; 2188 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2189 } 2190 } 2191 PetscCall(VecRestoreArrayWrite(v, &vv)); 2192 PetscCall(VecRestoreArrayRead(vA, &va)); 2193 PetscCall(VecRestoreArrayRead(vB, &vb)); 2194 PetscCall(PetscFree(idxb)); 2195 PetscCall(VecDestroy(&vA)); 2196 PetscCall(VecDestroy(&vB)); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2201 { 2202 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2203 Vec vB, vA; 2204 2205 PetscFunctionBegin; 2206 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2207 PetscCall(MatGetRowSumAbs(a->A, vA)); 2208 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2209 PetscCall(MatGetRowSumAbs(a->B, vB)); 2210 PetscCall(VecAXPY(vA, 1.0, vB)); 2211 PetscCall(VecDestroy(&vB)); 2212 PetscCall(VecCopy(vA, v)); 2213 PetscCall(VecDestroy(&vA)); 2214 PetscFunctionReturn(PETSC_SUCCESS); 2215 } 2216 2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2218 { 2219 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2220 PetscInt m = A->rmap->n, n = A->cmap->n; 2221 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2222 PetscInt *cmap = mat->garray; 2223 PetscInt *diagIdx, *offdiagIdx; 2224 Vec diagV, offdiagV; 2225 PetscScalar *a, *diagA, *offdiagA; 2226 const PetscScalar *ba, *bav; 2227 PetscInt r, j, col, ncols, *bi, *bj; 2228 Mat B = mat->B; 2229 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2230 2231 PetscFunctionBegin; 2232 /* When a process holds entire A and other processes have no entry */ 2233 if (A->cmap->N == n) { 2234 PetscCall(VecGetArrayWrite(v, &diagA)); 2235 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2236 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2237 PetscCall(VecDestroy(&diagV)); 2238 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } else if (n == 0) { 2241 if (m) { 2242 PetscCall(VecGetArrayWrite(v, &a)); 2243 for (r = 0; r < m; r++) { 2244 a[r] = 0.0; 2245 if (idx) idx[r] = -1; 2246 } 2247 PetscCall(VecRestoreArrayWrite(v, &a)); 2248 } 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2253 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2255 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2256 2257 /* Get offdiagIdx[] for implicit 0.0 */ 2258 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2259 ba = bav; 2260 bi = b->i; 2261 bj = b->j; 2262 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2263 for (r = 0; r < m; r++) { 2264 ncols = bi[r + 1] - bi[r]; 2265 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2266 offdiagA[r] = *ba; 2267 offdiagIdx[r] = cmap[0]; 2268 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2269 offdiagA[r] = 0.0; 2270 2271 /* Find first hole in the cmap */ 2272 for (j = 0; j < ncols; j++) { 2273 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2274 if (col > j && j < cstart) { 2275 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2276 break; 2277 } else if (col > j + n && j >= cstart) { 2278 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2279 break; 2280 } 2281 } 2282 if (j == ncols && ncols < A->cmap->N - n) { 2283 /* a hole is outside compressed Bcols */ 2284 if (ncols == 0) { 2285 if (cstart) { 2286 offdiagIdx[r] = 0; 2287 } else offdiagIdx[r] = cend; 2288 } else { /* ncols > 0 */ 2289 offdiagIdx[r] = cmap[ncols - 1] + 1; 2290 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2291 } 2292 } 2293 } 2294 2295 for (j = 0; j < ncols; j++) { 2296 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2297 offdiagA[r] = *ba; 2298 offdiagIdx[r] = cmap[*bj]; 2299 } 2300 ba++; 2301 bj++; 2302 } 2303 } 2304 2305 PetscCall(VecGetArrayWrite(v, &a)); 2306 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2307 for (r = 0; r < m; ++r) { 2308 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2309 a[r] = diagA[r]; 2310 if (idx) idx[r] = cstart + diagIdx[r]; 2311 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2312 a[r] = diagA[r]; 2313 if (idx) { 2314 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2315 idx[r] = cstart + diagIdx[r]; 2316 } else idx[r] = offdiagIdx[r]; 2317 } 2318 } else { 2319 a[r] = offdiagA[r]; 2320 if (idx) idx[r] = offdiagIdx[r]; 2321 } 2322 } 2323 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2324 PetscCall(VecRestoreArrayWrite(v, &a)); 2325 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2326 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2327 PetscCall(VecDestroy(&diagV)); 2328 PetscCall(VecDestroy(&offdiagV)); 2329 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2330 PetscFunctionReturn(PETSC_SUCCESS); 2331 } 2332 2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2334 { 2335 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2336 PetscInt m = A->rmap->n, n = A->cmap->n; 2337 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2338 PetscInt *cmap = mat->garray; 2339 PetscInt *diagIdx, *offdiagIdx; 2340 Vec diagV, offdiagV; 2341 PetscScalar *a, *diagA, *offdiagA; 2342 const PetscScalar *ba, *bav; 2343 PetscInt r, j, col, ncols, *bi, *bj; 2344 Mat B = mat->B; 2345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2346 2347 PetscFunctionBegin; 2348 /* When a process holds entire A and other processes have no entry */ 2349 if (A->cmap->N == n) { 2350 PetscCall(VecGetArrayWrite(v, &diagA)); 2351 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2352 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2353 PetscCall(VecDestroy(&diagV)); 2354 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } else if (n == 0) { 2357 if (m) { 2358 PetscCall(VecGetArrayWrite(v, &a)); 2359 for (r = 0; r < m; r++) { 2360 a[r] = PETSC_MAX_REAL; 2361 if (idx) idx[r] = -1; 2362 } 2363 PetscCall(VecRestoreArrayWrite(v, &a)); 2364 } 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2369 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2371 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2372 2373 /* Get offdiagIdx[] for implicit 0.0 */ 2374 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2375 ba = bav; 2376 bi = b->i; 2377 bj = b->j; 2378 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2379 for (r = 0; r < m; r++) { 2380 ncols = bi[r + 1] - bi[r]; 2381 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2382 offdiagA[r] = *ba; 2383 offdiagIdx[r] = cmap[0]; 2384 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2385 offdiagA[r] = 0.0; 2386 2387 /* Find first hole in the cmap */ 2388 for (j = 0; j < ncols; j++) { 2389 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2390 if (col > j && j < cstart) { 2391 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2392 break; 2393 } else if (col > j + n && j >= cstart) { 2394 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2395 break; 2396 } 2397 } 2398 if (j == ncols && ncols < A->cmap->N - n) { 2399 /* a hole is outside compressed Bcols */ 2400 if (ncols == 0) { 2401 if (cstart) { 2402 offdiagIdx[r] = 0; 2403 } else offdiagIdx[r] = cend; 2404 } else { /* ncols > 0 */ 2405 offdiagIdx[r] = cmap[ncols - 1] + 1; 2406 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2407 } 2408 } 2409 } 2410 2411 for (j = 0; j < ncols; j++) { 2412 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2413 offdiagA[r] = *ba; 2414 offdiagIdx[r] = cmap[*bj]; 2415 } 2416 ba++; 2417 bj++; 2418 } 2419 } 2420 2421 PetscCall(VecGetArrayWrite(v, &a)); 2422 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2423 for (r = 0; r < m; ++r) { 2424 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2425 a[r] = diagA[r]; 2426 if (idx) idx[r] = cstart + diagIdx[r]; 2427 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2428 a[r] = diagA[r]; 2429 if (idx) { 2430 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2431 idx[r] = cstart + diagIdx[r]; 2432 } else idx[r] = offdiagIdx[r]; 2433 } 2434 } else { 2435 a[r] = offdiagA[r]; 2436 if (idx) idx[r] = offdiagIdx[r]; 2437 } 2438 } 2439 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2440 PetscCall(VecRestoreArrayWrite(v, &a)); 2441 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2442 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2443 PetscCall(VecDestroy(&diagV)); 2444 PetscCall(VecDestroy(&offdiagV)); 2445 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2446 PetscFunctionReturn(PETSC_SUCCESS); 2447 } 2448 2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2450 { 2451 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2452 PetscInt m = A->rmap->n, n = A->cmap->n; 2453 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2454 PetscInt *cmap = mat->garray; 2455 PetscInt *diagIdx, *offdiagIdx; 2456 Vec diagV, offdiagV; 2457 PetscScalar *a, *diagA, *offdiagA; 2458 const PetscScalar *ba, *bav; 2459 PetscInt r, j, col, ncols, *bi, *bj; 2460 Mat B = mat->B; 2461 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2462 2463 PetscFunctionBegin; 2464 /* When a process holds entire A and other processes have no entry */ 2465 if (A->cmap->N == n) { 2466 PetscCall(VecGetArrayWrite(v, &diagA)); 2467 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2468 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2469 PetscCall(VecDestroy(&diagV)); 2470 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } else if (n == 0) { 2473 if (m) { 2474 PetscCall(VecGetArrayWrite(v, &a)); 2475 for (r = 0; r < m; r++) { 2476 a[r] = PETSC_MIN_REAL; 2477 if (idx) idx[r] = -1; 2478 } 2479 PetscCall(VecRestoreArrayWrite(v, &a)); 2480 } 2481 PetscFunctionReturn(PETSC_SUCCESS); 2482 } 2483 2484 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2485 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2487 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2488 2489 /* Get offdiagIdx[] for implicit 0.0 */ 2490 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2491 ba = bav; 2492 bi = b->i; 2493 bj = b->j; 2494 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2495 for (r = 0; r < m; r++) { 2496 ncols = bi[r + 1] - bi[r]; 2497 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2498 offdiagA[r] = *ba; 2499 offdiagIdx[r] = cmap[0]; 2500 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2501 offdiagA[r] = 0.0; 2502 2503 /* Find first hole in the cmap */ 2504 for (j = 0; j < ncols; j++) { 2505 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2506 if (col > j && j < cstart) { 2507 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2508 break; 2509 } else if (col > j + n && j >= cstart) { 2510 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2511 break; 2512 } 2513 } 2514 if (j == ncols && ncols < A->cmap->N - n) { 2515 /* a hole is outside compressed Bcols */ 2516 if (ncols == 0) { 2517 if (cstart) { 2518 offdiagIdx[r] = 0; 2519 } else offdiagIdx[r] = cend; 2520 } else { /* ncols > 0 */ 2521 offdiagIdx[r] = cmap[ncols - 1] + 1; 2522 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2523 } 2524 } 2525 } 2526 2527 for (j = 0; j < ncols; j++) { 2528 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2529 offdiagA[r] = *ba; 2530 offdiagIdx[r] = cmap[*bj]; 2531 } 2532 ba++; 2533 bj++; 2534 } 2535 } 2536 2537 PetscCall(VecGetArrayWrite(v, &a)); 2538 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2539 for (r = 0; r < m; ++r) { 2540 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 if (idx) idx[r] = cstart + diagIdx[r]; 2543 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2544 a[r] = diagA[r]; 2545 if (idx) { 2546 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2547 idx[r] = cstart + diagIdx[r]; 2548 } else idx[r] = offdiagIdx[r]; 2549 } 2550 } else { 2551 a[r] = offdiagA[r]; 2552 if (idx) idx[r] = offdiagIdx[r]; 2553 } 2554 } 2555 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2556 PetscCall(VecRestoreArrayWrite(v, &a)); 2557 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2558 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2559 PetscCall(VecDestroy(&diagV)); 2560 PetscCall(VecDestroy(&offdiagV)); 2561 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2566 { 2567 Mat *dummy; 2568 2569 PetscFunctionBegin; 2570 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2571 *newmat = *dummy; 2572 PetscCall(PetscFree(dummy)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2577 { 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2582 A->factorerrortype = a->A->factorerrortype; 2583 PetscFunctionReturn(PETSC_SUCCESS); 2584 } 2585 2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2587 { 2588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2589 2590 PetscFunctionBegin; 2591 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2592 PetscCall(MatSetRandom(aij->A, rctx)); 2593 if (x->assembled) { 2594 PetscCall(MatSetRandom(aij->B, rctx)); 2595 } else { 2596 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2597 } 2598 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2599 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2600 PetscFunctionReturn(PETSC_SUCCESS); 2601 } 2602 2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2604 { 2605 PetscFunctionBegin; 2606 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2607 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 /*@ 2612 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2613 2614 Not Collective 2615 2616 Input Parameter: 2617 . A - the matrix 2618 2619 Output Parameter: 2620 . nz - the number of nonzeros 2621 2622 Level: advanced 2623 2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2625 @*/ 2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2627 { 2628 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2629 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2630 PetscBool isaij; 2631 2632 PetscFunctionBegin; 2633 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2634 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2635 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 /*@ 2640 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2641 2642 Collective 2643 2644 Input Parameters: 2645 + A - the matrix 2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2647 2648 Level: advanced 2649 2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2651 @*/ 2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2653 { 2654 PetscFunctionBegin; 2655 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2660 { 2661 PetscBool sc = PETSC_FALSE, flg; 2662 2663 PetscFunctionBegin; 2664 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2665 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2666 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2667 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2668 PetscOptionsHeadEnd(); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2673 { 2674 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2675 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2676 2677 PetscFunctionBegin; 2678 if (!Y->preallocated) { 2679 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2680 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2681 PetscInt nonew = aij->nonew; 2682 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2683 aij->nonew = nonew; 2684 } 2685 PetscCall(MatShift_Basic(Y, a)); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2690 { 2691 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2692 2693 PetscFunctionBegin; 2694 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2695 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2696 if (d) { 2697 PetscInt rstart; 2698 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2699 *d += rstart; 2700 } 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2705 { 2706 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2720 PetscFunctionReturn(PETSC_SUCCESS); 2721 } 2722 2723 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2724 MatGetRow_MPIAIJ, 2725 MatRestoreRow_MPIAIJ, 2726 MatMult_MPIAIJ, 2727 /* 4*/ MatMultAdd_MPIAIJ, 2728 MatMultTranspose_MPIAIJ, 2729 MatMultTransposeAdd_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /*10*/ NULL, 2734 NULL, 2735 NULL, 2736 MatSOR_MPIAIJ, 2737 MatTranspose_MPIAIJ, 2738 /*15*/ MatGetInfo_MPIAIJ, 2739 MatEqual_MPIAIJ, 2740 MatGetDiagonal_MPIAIJ, 2741 MatDiagonalScale_MPIAIJ, 2742 MatNorm_MPIAIJ, 2743 /*20*/ MatAssemblyBegin_MPIAIJ, 2744 MatAssemblyEnd_MPIAIJ, 2745 MatSetOption_MPIAIJ, 2746 MatZeroEntries_MPIAIJ, 2747 /*24*/ MatZeroRows_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*29*/ MatSetUp_MPI_Hash, 2753 NULL, 2754 NULL, 2755 MatGetDiagonalBlock_MPIAIJ, 2756 NULL, 2757 /*34*/ MatDuplicate_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*39*/ MatAXPY_MPIAIJ, 2763 MatCreateSubMatrices_MPIAIJ, 2764 MatIncreaseOverlap_MPIAIJ, 2765 MatGetValues_MPIAIJ, 2766 MatCopy_MPIAIJ, 2767 /*44*/ MatGetRowMax_MPIAIJ, 2768 MatScale_MPIAIJ, 2769 MatShift_MPIAIJ, 2770 MatDiagonalSet_MPIAIJ, 2771 MatZeroRowsColumns_MPIAIJ, 2772 /*49*/ MatSetRandom_MPIAIJ, 2773 MatGetRowIJ_MPIAIJ, 2774 MatRestoreRowIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*54*/ MatFDColoringCreate_MPIXAIJ, 2778 NULL, 2779 MatSetUnfactored_MPIAIJ, 2780 MatPermute_MPIAIJ, 2781 NULL, 2782 /*59*/ MatCreateSubMatrix_MPIAIJ, 2783 MatDestroy_MPIAIJ, 2784 MatView_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 MatGetRowMaxAbs_MPIAIJ, 2792 /*69*/ MatGetRowMinAbs_MPIAIJ, 2793 NULL, 2794 NULL, 2795 MatFDColoringApply_AIJ, 2796 MatSetFromOptions_MPIAIJ, 2797 MatFindZeroDiagonals_MPIAIJ, 2798 /*75*/ NULL, 2799 NULL, 2800 NULL, 2801 MatLoad_MPIAIJ, 2802 NULL, 2803 /*80*/ NULL, 2804 NULL, 2805 NULL, 2806 /*83*/ NULL, 2807 NULL, 2808 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2809 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*89*/ MatBindToCPU_MPIAIJ, 2813 MatProductSetFromOptions_MPIAIJ, 2814 NULL, 2815 NULL, 2816 MatConjugate_MPIAIJ, 2817 /*94*/ NULL, 2818 MatSetValuesRow_MPIAIJ, 2819 MatRealPart_MPIAIJ, 2820 MatImaginaryPart_MPIAIJ, 2821 NULL, 2822 /*99*/ NULL, 2823 NULL, 2824 NULL, 2825 MatGetRowMin_MPIAIJ, 2826 NULL, 2827 /*104*/ MatMissingDiagonal_MPIAIJ, 2828 MatGetSeqNonzeroStructure_MPIAIJ, 2829 NULL, 2830 MatGetGhosts_MPIAIJ, 2831 NULL, 2832 /*109*/ NULL, 2833 MatMultDiagonalBlock_MPIAIJ, 2834 NULL, 2835 NULL, 2836 NULL, 2837 /*114*/ MatGetMultiProcBlock_MPIAIJ, 2838 MatFindNonzeroRows_MPIAIJ, 2839 MatGetColumnReductions_MPIAIJ, 2840 MatInvertBlockDiagonal_MPIAIJ, 2841 MatInvertVariableBlockDiagonal_MPIAIJ, 2842 /*119*/ MatCreateSubMatricesMPI_MPIAIJ, 2843 NULL, 2844 NULL, 2845 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2846 NULL, 2847 /*124*/ NULL, 2848 NULL, 2849 NULL, 2850 MatSetBlockSizes_MPIAIJ, 2851 NULL, 2852 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2853 MatFindOffBlockDiagonalEntries_MPIAIJ, 2854 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2855 NULL, 2856 NULL, 2857 /*134*/ NULL, 2858 MatCreateGraph_Simple_AIJ, 2859 NULL, 2860 MatEliminateZeros_MPIAIJ, 2861 MatGetRowSumAbs_MPIAIJ, 2862 /*139*/ NULL, 2863 NULL, 2864 NULL, 2865 MatCopyHashToXAIJ_MPI_Hash, 2866 MatGetCurrentMemType_MPIAIJ}; 2867 2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2869 { 2870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2871 2872 PetscFunctionBegin; 2873 PetscCall(MatStoreValues(aij->A)); 2874 PetscCall(MatStoreValues(aij->B)); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatRetrieveValues(aij->A)); 2884 PetscCall(MatRetrieveValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2889 { 2890 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2891 PetscMPIInt size; 2892 2893 PetscFunctionBegin; 2894 if (B->hash_active) { 2895 B->ops[0] = b->cops; 2896 B->hash_active = PETSC_FALSE; 2897 } 2898 PetscCall(PetscLayoutSetUp(B->rmap)); 2899 PetscCall(PetscLayoutSetUp(B->cmap)); 2900 2901 #if defined(PETSC_USE_CTABLE) 2902 PetscCall(PetscHMapIDestroy(&b->colmap)); 2903 #else 2904 PetscCall(PetscFree(b->colmap)); 2905 #endif 2906 PetscCall(PetscFree(b->garray)); 2907 PetscCall(VecDestroy(&b->lvec)); 2908 PetscCall(VecScatterDestroy(&b->Mvctx)); 2909 2910 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2911 2912 MatSeqXAIJGetOptions_Private(b->B); 2913 PetscCall(MatDestroy(&b->B)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2915 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2916 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2917 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2918 MatSeqXAIJRestoreOptions_Private(b->B); 2919 2920 MatSeqXAIJGetOptions_Private(b->A); 2921 PetscCall(MatDestroy(&b->A)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 MatSeqXAIJRestoreOptions_Private(b->A); 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(PETSC_SUCCESS); 2934 } 2935 2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2939 PetscBool ondiagreset, offdiagreset, memoryreset; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2943 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2944 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2945 2946 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2947 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2948 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2949 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2950 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2951 2952 PetscCall(PetscLayoutSetUp(B->rmap)); 2953 PetscCall(PetscLayoutSetUp(B->cmap)); 2954 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2955 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2956 PetscCall(VecScatterDestroy(&b->Mvctx)); 2957 2958 B->preallocated = PETSC_TRUE; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2962 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2967 { 2968 Mat mat; 2969 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2970 2971 PetscFunctionBegin; 2972 *newmat = NULL; 2973 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2974 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2975 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2976 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2977 a = (Mat_MPIAIJ *)mat->data; 2978 2979 mat->factortype = matin->factortype; 2980 mat->assembled = matin->assembled; 2981 mat->insertmode = NOT_SET_VALUES; 2982 2983 a->size = oldmat->size; 2984 a->rank = oldmat->rank; 2985 a->donotstash = oldmat->donotstash; 2986 a->roworiented = oldmat->roworiented; 2987 a->rowindices = NULL; 2988 a->rowvalues = NULL; 2989 a->getrowactive = PETSC_FALSE; 2990 2991 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2992 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2993 if (matin->hash_active) { 2994 PetscCall(MatSetUp(mat)); 2995 } else { 2996 mat->preallocated = matin->preallocated; 2997 if (oldmat->colmap) { 2998 #if defined(PETSC_USE_CTABLE) 2999 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3000 #else 3001 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3002 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3003 #endif 3004 } else a->colmap = NULL; 3005 if (oldmat->garray) { 3006 PetscInt len; 3007 len = oldmat->B->cmap->n; 3008 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3009 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3010 } else a->garray = NULL; 3011 3012 /* It may happen MatDuplicate is called with a non-assembled matrix 3013 In fact, MatDuplicate only requires the matrix to be preallocated 3014 This may happen inside a DMCreateMatrix_Shell */ 3015 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3016 if (oldmat->Mvctx) { 3017 a->Mvctx = oldmat->Mvctx; 3018 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3019 } 3020 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3021 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3022 } 3023 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3024 *newmat = mat; 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3029 { 3030 PetscBool isbinary, ishdf5; 3031 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3034 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3035 /* force binary viewer to load .info file if it has not yet done so */ 3036 PetscCall(PetscViewerSetUp(viewer)); 3037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3039 if (isbinary) { 3040 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3041 } else if (ishdf5) { 3042 #if defined(PETSC_HAVE_HDF5) 3043 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3044 #else 3045 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3046 #endif 3047 } else { 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3049 } 3050 PetscFunctionReturn(PETSC_SUCCESS); 3051 } 3052 3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3054 { 3055 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3056 PetscInt *rowidxs, *colidxs; 3057 PetscScalar *matvals; 3058 3059 PetscFunctionBegin; 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 3062 /* read in matrix header */ 3063 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3064 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3065 M = header[1]; 3066 N = header[2]; 3067 nz = header[3]; 3068 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3069 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3070 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3071 3072 /* set block sizes from the viewer's .info file */ 3073 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3074 /* set global sizes if not set already */ 3075 if (mat->rmap->N < 0) mat->rmap->N = M; 3076 if (mat->cmap->N < 0) mat->cmap->N = N; 3077 PetscCall(PetscLayoutSetUp(mat->rmap)); 3078 PetscCall(PetscLayoutSetUp(mat->cmap)); 3079 3080 /* check if the matrix sizes are correct */ 3081 PetscCall(MatGetSize(mat, &rows, &cols)); 3082 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3083 3084 /* read in row lengths and build row indices */ 3085 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3086 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3087 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3088 rowidxs[0] = 0; 3089 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3090 if (nz != PETSC_INT_MAX) { 3091 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 } 3094 3095 /* read in column indices and matrix values */ 3096 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3099 /* store matrix indices and values */ 3100 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3101 PetscCall(PetscFree(rowidxs)); 3102 PetscCall(PetscFree2(colidxs, matvals)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /* Not scalable because of ISAllGather() unless getting all columns. */ 3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3108 { 3109 IS iscol_local; 3110 PetscBool isstride; 3111 PetscMPIInt gisstride = 0; 3112 3113 PetscFunctionBegin; 3114 /* check if we are grabbing all columns*/ 3115 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3116 3117 if (isstride) { 3118 PetscInt start, len, mstart, mlen; 3119 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3120 PetscCall(ISGetLocalSize(iscol, &len)); 3121 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3122 if (mstart == start && mlen - mstart == len) gisstride = 1; 3123 } 3124 3125 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3126 if (gisstride) { 3127 PetscInt N; 3128 PetscCall(MatGetSize(mat, NULL, &N)); 3129 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3130 PetscCall(ISSetIdentity(iscol_local)); 3131 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3132 } else { 3133 PetscInt cbs; 3134 PetscCall(ISGetBlockSize(iscol, &cbs)); 3135 PetscCall(ISAllGather(iscol, &iscol_local)); 3136 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3137 } 3138 3139 *isseq = iscol_local; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 3143 /* 3144 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3145 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3146 3147 Input Parameters: 3148 + mat - matrix 3149 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->rstart <= isrow[i] < mat->rend 3151 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->cstart <= iscol[i] < mat->cend 3153 3154 Output Parameters: 3155 + isrow_d - sequential row index set for retrieving mat->A 3156 . iscol_d - sequential column index set for retrieving mat->A 3157 . iscol_o - sequential column index set for retrieving mat->B 3158 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3159 */ 3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n, count, M_size, N_size; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 PetscInt *garray, *garray_compact; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 // Compact garray so its not of size Bn 3301 PetscCall(ISGetSize(iscol_o, &count)); 3302 PetscCall(PetscMalloc1(count, &garray_compact)); 3303 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3304 3305 /* Create submatrix M */ 3306 PetscCall(ISGetSize(isrow, &M_size)); 3307 PetscCall(ISGetSize(iscol, &N_size)); 3308 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3309 3310 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3311 asub = (Mat_MPIAIJ *)M->data; 3312 3313 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3314 n = asub->B->cmap->N; 3315 if (BsubN > n) { 3316 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3317 const PetscInt *idx; 3318 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3319 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3320 3321 PetscCall(PetscMalloc1(n, &idx_new)); 3322 j = 0; 3323 PetscCall(ISGetIndices(iscol_o, &idx)); 3324 for (i = 0; i < n; i++) { 3325 if (j >= BsubN) break; 3326 while (subgarray[i] > garray[j]) j++; 3327 3328 PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3329 idx_new[i] = idx[j++]; 3330 } 3331 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3332 3333 PetscCall(ISDestroy(&iscol_o)); 3334 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3335 3336 } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3337 3338 PetscCall(PetscFree(garray)); 3339 *submat = M; 3340 3341 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3342 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3343 PetscCall(ISDestroy(&isrow_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3346 PetscCall(ISDestroy(&iscol_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3349 PetscCall(ISDestroy(&iscol_o)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3355 { 3356 IS iscol_local = NULL, isrow_d; 3357 PetscInt csize; 3358 PetscInt n, i, j, start, end; 3359 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3360 MPI_Comm comm; 3361 3362 PetscFunctionBegin; 3363 /* If isrow has same processor distribution as mat, 3364 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3365 if (call == MAT_REUSE_MATRIX) { 3366 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3367 if (isrow_d) { 3368 sameRowDist = PETSC_TRUE; 3369 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3370 } else { 3371 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3372 if (iscol_local) { 3373 sameRowDist = PETSC_TRUE; 3374 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3375 } 3376 } 3377 } else { 3378 /* Check if isrow has same processor distribution as mat */ 3379 sameDist[0] = PETSC_FALSE; 3380 PetscCall(ISGetLocalSize(isrow, &n)); 3381 if (!n) { 3382 sameDist[0] = PETSC_TRUE; 3383 } else { 3384 PetscCall(ISGetMinMax(isrow, &i, &j)); 3385 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3386 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3387 } 3388 3389 /* Check if iscol has same processor distribution as mat */ 3390 sameDist[1] = PETSC_FALSE; 3391 PetscCall(ISGetLocalSize(iscol, &n)); 3392 if (!n) { 3393 sameDist[1] = PETSC_TRUE; 3394 } else { 3395 PetscCall(ISGetMinMax(iscol, &i, &j)); 3396 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3397 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3398 } 3399 3400 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3401 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3402 sameRowDist = tsameDist[0]; 3403 } 3404 3405 if (sameRowDist) { 3406 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3407 /* isrow and iscol have same processor distribution as mat */ 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } else { /* sameRowDist */ 3411 /* isrow has same processor distribution as mat */ 3412 if (call == MAT_INITIAL_MATRIX) { 3413 PetscBool sorted; 3414 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3415 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3416 PetscCall(ISGetSize(iscol, &i)); 3417 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3418 3419 PetscCall(ISSorted(iscol_local, &sorted)); 3420 if (sorted) { 3421 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 } else { /* call == MAT_REUSE_MATRIX */ 3426 IS iscol_sub; 3427 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3428 if (iscol_sub) { 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 } 3433 } 3434 } 3435 3436 /* General case: iscol -> iscol_local which has global size of iscol */ 3437 if (call == MAT_REUSE_MATRIX) { 3438 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3439 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3440 } else { 3441 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3442 } 3443 3444 PetscCall(ISGetLocalSize(iscol, &csize)); 3445 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3446 3447 if (call == MAT_INITIAL_MATRIX) { 3448 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3449 PetscCall(ISDestroy(&iscol_local)); 3450 } 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 3454 /*@C 3455 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3456 and "off-diagonal" part of the matrix in CSR format. 3457 3458 Collective 3459 3460 Input Parameters: 3461 + comm - MPI communicator 3462 . M - the global row size 3463 . N - the global column size 3464 . A - "diagonal" portion of matrix 3465 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3466 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3467 3468 Output Parameter: 3469 . mat - the matrix, with input `A` as its local diagonal matrix 3470 3471 Level: advanced 3472 3473 Notes: 3474 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3475 3476 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3477 3478 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3479 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3480 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3481 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3482 3483 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3484 3485 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3486 @*/ 3487 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3488 { 3489 PetscInt m, n; 3490 MatType mpi_mat_type; 3491 Mat_MPIAIJ *mpiaij; 3492 Mat C; 3493 3494 PetscFunctionBegin; 3495 PetscCall(MatCreate(comm, &C)); 3496 PetscCall(MatGetSize(A, &m, &n)); 3497 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3498 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3499 3500 PetscCall(MatSetSizes(C, m, n, M, N)); 3501 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3502 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3503 PetscCall(MatSetType(C, mpi_mat_type)); 3504 if (!garray) { 3505 const PetscScalar *ba; 3506 3507 B->nonzerostate++; 3508 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3509 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3510 } 3511 3512 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3513 PetscCall(PetscLayoutSetUp(C->rmap)); 3514 PetscCall(PetscLayoutSetUp(C->cmap)); 3515 3516 mpiaij = (Mat_MPIAIJ *)C->data; 3517 mpiaij->A = A; 3518 mpiaij->B = B; 3519 mpiaij->garray = garray; 3520 C->preallocated = PETSC_TRUE; 3521 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3522 3523 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3524 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3525 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3526 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3527 */ 3528 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3529 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3530 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3531 *mat = C; 3532 PetscFunctionReturn(PETSC_SUCCESS); 3533 } 3534 3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3536 3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3538 { 3539 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3540 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3541 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3542 Mat M, Msub, B = a->B; 3543 MatScalar *aa; 3544 Mat_SeqAIJ *aij; 3545 PetscInt *garray = a->garray, *colsub, Ncols; 3546 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3547 IS iscol_sub, iscmap; 3548 const PetscInt *is_idx, *cmap; 3549 PetscBool allcolumns = PETSC_FALSE; 3550 MPI_Comm comm; 3551 3552 PetscFunctionBegin; 3553 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3554 if (call == MAT_REUSE_MATRIX) { 3555 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3556 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3557 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3558 3559 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3560 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3561 3562 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3563 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3564 3565 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3566 3567 } else { /* call == MAT_INITIAL_MATRIX) */ 3568 PetscBool flg; 3569 3570 PetscCall(ISGetLocalSize(iscol, &n)); 3571 PetscCall(ISGetSize(iscol, &Ncols)); 3572 3573 /* (1) iscol -> nonscalable iscol_local */ 3574 /* Check for special case: each processor gets entire matrix columns */ 3575 PetscCall(ISIdentity(iscol_local, &flg)); 3576 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3577 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3578 if (allcolumns) { 3579 iscol_sub = iscol_local; 3580 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3581 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3582 3583 } else { 3584 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3585 PetscInt *idx, *cmap1, k; 3586 PetscCall(PetscMalloc1(Ncols, &idx)); 3587 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3588 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3589 count = 0; 3590 k = 0; 3591 for (i = 0; i < Ncols; i++) { 3592 j = is_idx[i]; 3593 if (j >= cstart && j < cend) { 3594 /* diagonal part of mat */ 3595 idx[count] = j; 3596 cmap1[count++] = i; /* column index in submat */ 3597 } else if (Bn) { 3598 /* off-diagonal part of mat */ 3599 if (j == garray[k]) { 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } else if (j > garray[k]) { 3603 while (j > garray[k] && k < Bn - 1) k++; 3604 if (j == garray[k]) { 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } 3608 } 3609 } 3610 } 3611 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3612 3613 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3614 PetscCall(ISGetBlockSize(iscol, &cbs)); 3615 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3616 3617 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3618 } 3619 3620 /* (3) Create sequential Msub */ 3621 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3622 } 3623 3624 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3625 aij = (Mat_SeqAIJ *)Msub->data; 3626 ii = aij->i; 3627 PetscCall(ISGetIndices(iscmap, &cmap)); 3628 3629 /* 3630 m - number of local rows 3631 Ncols - number of columns (same on all processors) 3632 rstart - first row in new global matrix generated 3633 */ 3634 PetscCall(MatGetSize(Msub, &m, NULL)); 3635 3636 if (call == MAT_INITIAL_MATRIX) { 3637 /* (4) Create parallel newmat */ 3638 PetscMPIInt rank, size; 3639 PetscInt csize; 3640 3641 PetscCallMPI(MPI_Comm_size(comm, &size)); 3642 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3643 3644 /* 3645 Determine the number of non-zeros in the diagonal and off-diagonal 3646 portions of the matrix in order to do correct preallocation 3647 */ 3648 3649 /* first get start and end of "diagonal" columns */ 3650 PetscCall(ISGetLocalSize(iscol, &csize)); 3651 if (csize == PETSC_DECIDE) { 3652 PetscCall(ISGetSize(isrow, &mglobal)); 3653 if (mglobal == Ncols) { /* square matrix */ 3654 nlocal = m; 3655 } else { 3656 nlocal = Ncols / size + ((Ncols % size) > rank); 3657 } 3658 } else { 3659 nlocal = csize; 3660 } 3661 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3662 rstart = rend - nlocal; 3663 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3664 3665 /* next, compute all the lengths */ 3666 jj = aij->j; 3667 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3668 olens = dlens + m; 3669 for (i = 0; i < m; i++) { 3670 jend = ii[i + 1] - ii[i]; 3671 olen = 0; 3672 dlen = 0; 3673 for (j = 0; j < jend; j++) { 3674 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3675 else dlen++; 3676 jj++; 3677 } 3678 olens[i] = olen; 3679 dlens[i] = dlen; 3680 } 3681 3682 PetscCall(ISGetBlockSize(isrow, &bs)); 3683 PetscCall(ISGetBlockSize(iscol, &cbs)); 3684 3685 PetscCall(MatCreate(comm, &M)); 3686 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3687 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3688 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3689 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3690 PetscCall(PetscFree(dlens)); 3691 3692 } else { /* call == MAT_REUSE_MATRIX */ 3693 M = *newmat; 3694 PetscCall(MatGetLocalSize(M, &i, NULL)); 3695 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3696 PetscCall(MatZeroEntries(M)); 3697 /* 3698 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3699 rather than the slower MatSetValues(). 3700 */ 3701 M->was_assembled = PETSC_TRUE; 3702 M->assembled = PETSC_FALSE; 3703 } 3704 3705 /* (5) Set values of Msub to *newmat */ 3706 PetscCall(PetscMalloc1(count, &colsub)); 3707 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3708 3709 jj = aij->j; 3710 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3711 for (i = 0; i < m; i++) { 3712 row = rstart + i; 3713 nz = ii[i + 1] - ii[i]; 3714 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3715 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3716 jj += nz; 3717 aa += nz; 3718 } 3719 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3720 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3721 3722 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3723 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3724 3725 PetscCall(PetscFree(colsub)); 3726 3727 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3728 if (call == MAT_INITIAL_MATRIX) { 3729 *newmat = M; 3730 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3731 PetscCall(MatDestroy(&Msub)); 3732 3733 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3734 PetscCall(ISDestroy(&iscol_sub)); 3735 3736 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3737 PetscCall(ISDestroy(&iscmap)); 3738 3739 if (iscol_local) { 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3741 PetscCall(ISDestroy(&iscol_local)); 3742 } 3743 } 3744 PetscFunctionReturn(PETSC_SUCCESS); 3745 } 3746 3747 /* 3748 Not great since it makes two copies of the submatrix, first an SeqAIJ 3749 in local and then by concatenating the local matrices the end result. 3750 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3751 3752 This requires a sequential iscol with all indices. 3753 */ 3754 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3755 { 3756 PetscMPIInt rank, size; 3757 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3758 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3759 Mat M, Mreuse; 3760 MatScalar *aa, *vwork; 3761 MPI_Comm comm; 3762 Mat_SeqAIJ *aij; 3763 PetscBool colflag, allcolumns = PETSC_FALSE; 3764 3765 PetscFunctionBegin; 3766 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3767 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3768 PetscCallMPI(MPI_Comm_size(comm, &size)); 3769 3770 /* Check for special case: each processor gets entire matrix columns */ 3771 PetscCall(ISIdentity(iscol, &colflag)); 3772 PetscCall(ISGetLocalSize(iscol, &n)); 3773 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3774 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3775 3776 if (call == MAT_REUSE_MATRIX) { 3777 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3778 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3779 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3780 } else { 3781 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3782 } 3783 3784 /* 3785 m - number of local rows 3786 n - number of columns (same on all processors) 3787 rstart - first row in new global matrix generated 3788 */ 3789 PetscCall(MatGetSize(Mreuse, &m, &n)); 3790 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3791 if (call == MAT_INITIAL_MATRIX) { 3792 aij = (Mat_SeqAIJ *)Mreuse->data; 3793 ii = aij->i; 3794 jj = aij->j; 3795 3796 /* 3797 Determine the number of non-zeros in the diagonal and off-diagonal 3798 portions of the matrix in order to do correct preallocation 3799 */ 3800 3801 /* first get start and end of "diagonal" columns */ 3802 if (csize == PETSC_DECIDE) { 3803 PetscCall(ISGetSize(isrow, &mglobal)); 3804 if (mglobal == n) { /* square matrix */ 3805 nlocal = m; 3806 } else { 3807 nlocal = n / size + ((n % size) > rank); 3808 } 3809 } else { 3810 nlocal = csize; 3811 } 3812 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3813 rstart = rend - nlocal; 3814 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3815 3816 /* next, compute all the lengths */ 3817 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3818 olens = dlens + m; 3819 for (i = 0; i < m; i++) { 3820 jend = ii[i + 1] - ii[i]; 3821 olen = 0; 3822 dlen = 0; 3823 for (j = 0; j < jend; j++) { 3824 if (*jj < rstart || *jj >= rend) olen++; 3825 else dlen++; 3826 jj++; 3827 } 3828 olens[i] = olen; 3829 dlens[i] = dlen; 3830 } 3831 PetscCall(MatCreate(comm, &M)); 3832 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3833 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3834 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3835 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3836 PetscCall(PetscFree(dlens)); 3837 } else { 3838 PetscInt ml, nl; 3839 3840 M = *newmat; 3841 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3842 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3843 PetscCall(MatZeroEntries(M)); 3844 /* 3845 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3846 rather than the slower MatSetValues(). 3847 */ 3848 M->was_assembled = PETSC_TRUE; 3849 M->assembled = PETSC_FALSE; 3850 } 3851 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3852 aij = (Mat_SeqAIJ *)Mreuse->data; 3853 ii = aij->i; 3854 jj = aij->j; 3855 3856 /* trigger copy to CPU if needed */ 3857 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3858 for (i = 0; i < m; i++) { 3859 row = rstart + i; 3860 nz = ii[i + 1] - ii[i]; 3861 cwork = jj; 3862 jj = PetscSafePointerPlusOffset(jj, nz); 3863 vwork = aa; 3864 aa = PetscSafePointerPlusOffset(aa, nz); 3865 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3866 } 3867 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3868 3869 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3870 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3871 *newmat = M; 3872 3873 /* save submatrix used in processor for next request */ 3874 if (call == MAT_INITIAL_MATRIX) { 3875 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3876 PetscCall(MatDestroy(&Mreuse)); 3877 } 3878 PetscFunctionReturn(PETSC_SUCCESS); 3879 } 3880 3881 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3882 { 3883 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3884 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3885 const PetscInt *JJ; 3886 PetscBool nooffprocentries; 3887 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3888 3889 PetscFunctionBegin; 3890 PetscCall(PetscLayoutSetUp(B->rmap)); 3891 PetscCall(PetscLayoutSetUp(B->cmap)); 3892 m = B->rmap->n; 3893 cstart = B->cmap->rstart; 3894 cend = B->cmap->rend; 3895 rstart = B->rmap->rstart; 3896 irstart = Ii[0]; 3897 3898 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3899 3900 if (PetscDefined(USE_DEBUG)) { 3901 for (i = 0; i < m; i++) { 3902 nnz = Ii[i + 1] - Ii[i]; 3903 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3904 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3905 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3906 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3907 } 3908 } 3909 3910 for (i = 0; i < m; i++) { 3911 nnz = Ii[i + 1] - Ii[i]; 3912 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3913 nnz_max = PetscMax(nnz_max, nnz); 3914 d = 0; 3915 for (j = 0; j < nnz; j++) { 3916 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3917 } 3918 d_nnz[i] = d; 3919 o_nnz[i] = nnz - d; 3920 } 3921 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3922 PetscCall(PetscFree2(d_nnz, o_nnz)); 3923 3924 for (i = 0; i < m; i++) { 3925 ii = i + rstart; 3926 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3927 } 3928 nooffprocentries = B->nooffprocentries; 3929 B->nooffprocentries = PETSC_TRUE; 3930 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3931 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3932 B->nooffprocentries = nooffprocentries; 3933 3934 /* count number of entries below block diagonal */ 3935 PetscCall(PetscFree(Aij->ld)); 3936 PetscCall(PetscCalloc1(m, &ld)); 3937 Aij->ld = ld; 3938 for (i = 0; i < m; i++) { 3939 nnz = Ii[i + 1] - Ii[i]; 3940 j = 0; 3941 while (j < nnz && J[j] < cstart) j++; 3942 ld[i] = j; 3943 if (J) J += nnz; 3944 } 3945 3946 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3947 PetscFunctionReturn(PETSC_SUCCESS); 3948 } 3949 3950 /*@ 3951 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3952 (the default parallel PETSc format). 3953 3954 Collective 3955 3956 Input Parameters: 3957 + B - the matrix 3958 . i - the indices into `j` for the start of each local row (indices start with zero) 3959 . j - the column indices for each local row (indices start with zero) 3960 - v - optional values in the matrix 3961 3962 Level: developer 3963 3964 Notes: 3965 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3966 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3967 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3968 3969 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3970 3971 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3972 3973 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3974 3975 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3976 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown 3981 .vb 3982 1 0 0 3983 2 0 3 P0 3984 ------- 3985 4 5 6 P1 3986 3987 Process0 [P0] rows_owned=[0,1] 3988 i = {0,1,3} [size = nrow+1 = 2+1] 3989 j = {0,0,2} [size = 3] 3990 v = {1,2,3} [size = 3] 3991 3992 Process1 [P1] rows_owned=[2] 3993 i = {0,3} [size = nrow+1 = 1+1] 3994 j = {0,1,2} [size = 3] 3995 v = {4,5,6} [size = 3] 3996 .ve 3997 3998 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3999 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4000 @*/ 4001 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4002 { 4003 PetscFunctionBegin; 4004 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4005 PetscFunctionReturn(PETSC_SUCCESS); 4006 } 4007 4008 /*@ 4009 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4010 (the default parallel PETSc format). For good matrix assembly performance 4011 the user should preallocate the matrix storage by setting the parameters 4012 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4013 4014 Collective 4015 4016 Input Parameters: 4017 + B - the matrix 4018 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4019 (same value is used for all local rows) 4020 . d_nnz - array containing the number of nonzeros in the various rows of the 4021 DIAGONAL portion of the local submatrix (possibly different for each row) 4022 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4023 The size of this array is equal to the number of local rows, i.e 'm'. 4024 For matrices that will be factored, you must leave room for (and set) 4025 the diagonal entry even if it is zero. 4026 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4027 submatrix (same value is used for all local rows). 4028 - o_nnz - array containing the number of nonzeros in the various rows of the 4029 OFF-DIAGONAL portion of the local submatrix (possibly different for 4030 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4031 structure. The size of this array is equal to the number 4032 of local rows, i.e 'm'. 4033 4034 Example Usage: 4035 Consider the following 8x8 matrix with 34 non-zero values, that is 4036 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4037 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4038 as follows 4039 4040 .vb 4041 1 2 0 | 0 3 0 | 0 4 4042 Proc0 0 5 6 | 7 0 0 | 8 0 4043 9 0 10 | 11 0 0 | 12 0 4044 ------------------------------------- 4045 13 0 14 | 15 16 17 | 0 0 4046 Proc1 0 18 0 | 19 20 21 | 0 0 4047 0 0 0 | 22 23 0 | 24 0 4048 ------------------------------------- 4049 Proc2 25 26 27 | 0 0 28 | 29 0 4050 30 0 0 | 31 32 33 | 0 34 4051 .ve 4052 4053 This can be represented as a collection of submatrices as 4054 .vb 4055 A B C 4056 D E F 4057 G H I 4058 .ve 4059 4060 Where the submatrices A,B,C are owned by proc0, D,E,F are 4061 owned by proc1, G,H,I are owned by proc2. 4062 4063 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4064 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'M','N' parameters are 8,8, and have the same values on all procs. 4066 4067 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4068 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4069 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4070 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4071 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4072 matrix, and [DF] as another `MATSEQAIJ` matrix. 4073 4074 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4075 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4076 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4077 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4078 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4079 In this case, the values of `d_nz`, `o_nz` are 4080 .vb 4081 proc0 dnz = 2, o_nz = 2 4082 proc1 dnz = 3, o_nz = 2 4083 proc2 dnz = 1, o_nz = 4 4084 .ve 4085 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4086 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4087 for proc3. i.e we are using 12+15+10=37 storage locations to store 4088 34 values. 4089 4090 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4091 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4092 In the above case the values for `d_nnz`, `o_nnz` are 4093 .vb 4094 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4095 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4096 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4097 .ve 4098 Here the space allocated is sum of all the above values i.e 34, and 4099 hence pre-allocation is perfect. 4100 4101 Level: intermediate 4102 4103 Notes: 4104 If the *_nnz parameter is given then the *_nz parameter is ignored 4105 4106 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4107 storage. The stored row and column indices begin with zero. 4108 See [Sparse Matrices](sec_matsparse) for details. 4109 4110 The parallel matrix is partitioned such that the first m0 rows belong to 4111 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4112 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4113 4114 The DIAGONAL portion of the local submatrix of a processor can be defined 4115 as the submatrix which is obtained by extraction the part corresponding to 4116 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4117 first row that belongs to the processor, r2 is the last row belonging to 4118 the this processor, and c1-c2 is range of indices of the local part of a 4119 vector suitable for applying the matrix to. This is an mxn matrix. In the 4120 common case of a square matrix, the row and column ranges are the same and 4121 the DIAGONAL part is also square. The remaining portion of the local 4122 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4123 4124 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4125 4126 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4127 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4128 You can also run with the option `-info` and look for messages with the string 4129 malloc in them to see if additional memory allocation was needed. 4130 4131 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4132 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4133 @*/ 4134 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4135 { 4136 PetscFunctionBegin; 4137 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4138 PetscValidType(B, 1); 4139 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4140 PetscFunctionReturn(PETSC_SUCCESS); 4141 } 4142 4143 /*@ 4144 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4145 CSR format for the local rows. 4146 4147 Collective 4148 4149 Input Parameters: 4150 + comm - MPI communicator 4151 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4152 . n - This value should be the same as the local size used in creating the 4153 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4154 calculated if `N` is given) For square matrices n is almost always `m`. 4155 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4156 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4157 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4158 . j - global column indices 4159 - a - optional matrix values 4160 4161 Output Parameter: 4162 . mat - the matrix 4163 4164 Level: intermediate 4165 4166 Notes: 4167 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4168 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4169 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4170 4171 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4172 4173 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4174 4175 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4176 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4177 4178 The format which is used for the sparse matrix input, is equivalent to a 4179 row-major ordering, i.e., for the following matrix, the input data expected is 4180 as shown 4181 .vb 4182 1 0 0 4183 2 0 3 P0 4184 ------- 4185 4 5 6 P1 4186 4187 Process0 [P0] rows_owned=[0,1] 4188 i = {0,1,3} [size = nrow+1 = 2+1] 4189 j = {0,0,2} [size = 3] 4190 v = {1,2,3} [size = 3] 4191 4192 Process1 [P1] rows_owned=[2] 4193 i = {0,3} [size = nrow+1 = 1+1] 4194 j = {0,1,2} [size = 3] 4195 v = {4,5,6} [size = 3] 4196 .ve 4197 4198 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(PETSC_SUCCESS); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4217 from `MatCreateMPIAIJWithArrays()` 4218 4219 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4220 4221 Collective 4222 4223 Input Parameters: 4224 + mat - the matrix 4225 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4226 . n - This value should be the same as the local size used in creating the 4227 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4228 calculated if N is given) For square matrices n is almost always m. 4229 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4230 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4231 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4232 . J - column indices 4233 - v - matrix values 4234 4235 Level: deprecated 4236 4237 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4238 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4239 @*/ 4240 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4241 { 4242 PetscInt nnz, i; 4243 PetscBool nooffprocentries; 4244 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4245 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4246 PetscScalar *ad, *ao; 4247 PetscInt ldi, Iii, md; 4248 const PetscInt *Adi = Ad->i; 4249 PetscInt *ld = Aij->ld; 4250 4251 PetscFunctionBegin; 4252 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4253 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4254 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4256 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4258 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4259 4260 for (i = 0; i < m; i++) { 4261 if (PetscDefined(USE_DEBUG)) { 4262 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4263 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4264 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4265 } 4266 } 4267 nnz = Ii[i + 1] - Ii[i]; 4268 Iii = Ii[i]; 4269 ldi = ld[i]; 4270 md = Adi[i + 1] - Adi[i]; 4271 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4272 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4273 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4274 ad += md; 4275 ao += nnz - md; 4276 } 4277 nooffprocentries = mat->nooffprocentries; 4278 mat->nooffprocentries = PETSC_TRUE; 4279 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4280 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4281 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4282 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4283 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4284 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4285 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4286 mat->nooffprocentries = nooffprocentries; 4287 PetscFunctionReturn(PETSC_SUCCESS); 4288 } 4289 4290 /*@ 4291 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4292 4293 Collective 4294 4295 Input Parameters: 4296 + mat - the matrix 4297 - v - matrix values, stored by row 4298 4299 Level: intermediate 4300 4301 Notes: 4302 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4303 4304 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4305 4306 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4307 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4308 @*/ 4309 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4310 { 4311 PetscInt nnz, i, m; 4312 PetscBool nooffprocentries; 4313 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4314 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4315 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4316 PetscScalar *ad, *ao; 4317 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4318 PetscInt ldi, Iii, md; 4319 PetscInt *ld = Aij->ld; 4320 4321 PetscFunctionBegin; 4322 m = mat->rmap->n; 4323 4324 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4325 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4326 Iii = 0; 4327 for (i = 0; i < m; i++) { 4328 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4329 ldi = ld[i]; 4330 md = Adi[i + 1] - Adi[i]; 4331 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4332 ad += md; 4333 if (ao) { 4334 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4335 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4336 ao += nnz - md; 4337 } 4338 Iii += nnz; 4339 } 4340 nooffprocentries = mat->nooffprocentries; 4341 mat->nooffprocentries = PETSC_TRUE; 4342 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4343 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4344 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4345 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4346 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4347 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4348 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4349 mat->nooffprocentries = nooffprocentries; 4350 PetscFunctionReturn(PETSC_SUCCESS); 4351 } 4352 4353 /*@ 4354 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4355 (the default parallel PETSc format). For good matrix assembly performance 4356 the user should preallocate the matrix storage by setting the parameters 4357 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4358 4359 Collective 4360 4361 Input Parameters: 4362 + comm - MPI communicator 4363 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4364 This value should be the same as the local size used in creating the 4365 y vector for the matrix-vector product y = Ax. 4366 . n - This value should be the same as the local size used in creating the 4367 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4368 calculated if N is given) For square matrices n is almost always m. 4369 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4370 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4371 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4372 (same value is used for all local rows) 4373 . d_nnz - array containing the number of nonzeros in the various rows of the 4374 DIAGONAL portion of the local submatrix (possibly different for each row) 4375 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4376 The size of this array is equal to the number of local rows, i.e 'm'. 4377 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4378 submatrix (same value is used for all local rows). 4379 - o_nnz - array containing the number of nonzeros in the various rows of the 4380 OFF-DIAGONAL portion of the local submatrix (possibly different for 4381 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4382 structure. The size of this array is equal to the number 4383 of local rows, i.e 'm'. 4384 4385 Output Parameter: 4386 . A - the matrix 4387 4388 Options Database Keys: 4389 + -mat_no_inode - Do not use inodes 4390 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4391 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4392 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4393 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4394 4395 Level: intermediate 4396 4397 Notes: 4398 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4399 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4400 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4401 4402 If the *_nnz parameter is given then the *_nz parameter is ignored 4403 4404 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4405 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4406 storage requirements for this matrix. 4407 4408 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4409 processor than it must be used on all processors that share the object for 4410 that argument. 4411 4412 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4413 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4414 4415 The user MUST specify either the local or global matrix dimensions 4416 (possibly both). 4417 4418 The parallel matrix is partitioned across processors such that the 4419 first `m0` rows belong to process 0, the next `m1` rows belong to 4420 process 1, the next `m2` rows belong to process 2, etc., where 4421 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4422 values corresponding to [m x N] submatrix. 4423 4424 The columns are logically partitioned with the n0 columns belonging 4425 to 0th partition, the next n1 columns belonging to the next 4426 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4427 4428 The DIAGONAL portion of the local submatrix on any given processor 4429 is the submatrix corresponding to the rows and columns m,n 4430 corresponding to the given processor. i.e diagonal matrix on 4431 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4432 etc. The remaining portion of the local submatrix [m x (N-n)] 4433 constitute the OFF-DIAGONAL portion. The example below better 4434 illustrates this concept. The two matrices, the DIAGONAL portion and 4435 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4436 4437 For a square global matrix we define each processor's diagonal portion 4438 to be its local rows and the corresponding columns (a square submatrix); 4439 each processor's off-diagonal portion encompasses the remainder of the 4440 local matrix (a rectangular submatrix). 4441 4442 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4443 4444 When calling this routine with a single process communicator, a matrix of 4445 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4446 type of communicator, use the construction mechanism 4447 .vb 4448 MatCreate(..., &A); 4449 MatSetType(A, MATMPIAIJ); 4450 MatSetSizes(A, m, n, M, N); 4451 MatMPIAIJSetPreallocation(A, ...); 4452 .ve 4453 4454 By default, this format uses inodes (identical nodes) when possible. 4455 We search for consecutive rows with the same nonzero structure, thereby 4456 reusing matrix information to achieve increased efficiency. 4457 4458 Example Usage: 4459 Consider the following 8x8 matrix with 34 non-zero values, that is 4460 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4461 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4462 as follows 4463 4464 .vb 4465 1 2 0 | 0 3 0 | 0 4 4466 Proc0 0 5 6 | 7 0 0 | 8 0 4467 9 0 10 | 11 0 0 | 12 0 4468 ------------------------------------- 4469 13 0 14 | 15 16 17 | 0 0 4470 Proc1 0 18 0 | 19 20 21 | 0 0 4471 0 0 0 | 22 23 0 | 24 0 4472 ------------------------------------- 4473 Proc2 25 26 27 | 0 0 28 | 29 0 4474 30 0 0 | 31 32 33 | 0 34 4475 .ve 4476 4477 This can be represented as a collection of submatrices as 4478 4479 .vb 4480 A B C 4481 D E F 4482 G H I 4483 .ve 4484 4485 Where the submatrices A,B,C are owned by proc0, D,E,F are 4486 owned by proc1, G,H,I are owned by proc2. 4487 4488 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4489 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4490 The 'M','N' parameters are 8,8, and have the same values on all procs. 4491 4492 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4493 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4494 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4495 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4496 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4497 matrix, and [DF] as another SeqAIJ matrix. 4498 4499 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4500 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4501 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4502 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4503 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4504 In this case, the values of `d_nz`,`o_nz` are 4505 .vb 4506 proc0 dnz = 2, o_nz = 2 4507 proc1 dnz = 3, o_nz = 2 4508 proc2 dnz = 1, o_nz = 4 4509 .ve 4510 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4511 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4512 for proc3. i.e we are using 12+15+10=37 storage locations to store 4513 34 values. 4514 4515 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4516 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4517 In the above case the values for d_nnz,o_nnz are 4518 .vb 4519 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4520 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4521 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4522 .ve 4523 Here the space allocated is sum of all the above values i.e 34, and 4524 hence pre-allocation is perfect. 4525 4526 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4527 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4528 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4529 @*/ 4530 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4531 { 4532 PetscMPIInt size; 4533 4534 PetscFunctionBegin; 4535 PetscCall(MatCreate(comm, A)); 4536 PetscCall(MatSetSizes(*A, m, n, M, N)); 4537 PetscCallMPI(MPI_Comm_size(comm, &size)); 4538 if (size > 1) { 4539 PetscCall(MatSetType(*A, MATMPIAIJ)); 4540 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4541 } else { 4542 PetscCall(MatSetType(*A, MATSEQAIJ)); 4543 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4544 } 4545 PetscFunctionReturn(PETSC_SUCCESS); 4546 } 4547 4548 /*@C 4549 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4550 4551 Not Collective 4552 4553 Input Parameter: 4554 . A - The `MATMPIAIJ` matrix 4555 4556 Output Parameters: 4557 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4558 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4559 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4560 4561 Level: intermediate 4562 4563 Note: 4564 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4565 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4566 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4567 local column numbers to global column numbers in the original matrix. 4568 4569 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4570 @*/ 4571 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4572 { 4573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4574 PetscBool flg; 4575 4576 PetscFunctionBegin; 4577 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4578 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4579 if (Ad) *Ad = a->A; 4580 if (Ao) *Ao = a->B; 4581 if (colmap) *colmap = a->garray; 4582 PetscFunctionReturn(PETSC_SUCCESS); 4583 } 4584 4585 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4586 { 4587 PetscInt m, N, i, rstart, nnz, Ii; 4588 PetscInt *indx; 4589 PetscScalar *values; 4590 MatType rootType; 4591 4592 PetscFunctionBegin; 4593 PetscCall(MatGetSize(inmat, &m, &N)); 4594 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4595 PetscInt *dnz, *onz, sum, bs, cbs; 4596 4597 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4598 /* Check sum(n) = N */ 4599 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4600 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4601 4602 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4603 rstart -= m; 4604 4605 MatPreallocateBegin(comm, m, n, dnz, onz); 4606 for (i = 0; i < m; i++) { 4607 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4608 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4609 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4610 } 4611 4612 PetscCall(MatCreate(comm, outmat)); 4613 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4614 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4615 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4616 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4617 PetscCall(MatSetType(*outmat, rootType)); 4618 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4619 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4620 MatPreallocateEnd(dnz, onz); 4621 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4622 } 4623 4624 /* numeric phase */ 4625 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4626 for (i = 0; i < m; i++) { 4627 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4628 Ii = i + rstart; 4629 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4630 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4631 } 4632 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4633 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4634 PetscFunctionReturn(PETSC_SUCCESS); 4635 } 4636 4637 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4638 { 4639 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4640 4641 PetscFunctionBegin; 4642 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4643 PetscCall(PetscFree(merge->id_r)); 4644 PetscCall(PetscFree(merge->len_s)); 4645 PetscCall(PetscFree(merge->len_r)); 4646 PetscCall(PetscFree(merge->bi)); 4647 PetscCall(PetscFree(merge->bj)); 4648 PetscCall(PetscFree(merge->buf_ri[0])); 4649 PetscCall(PetscFree(merge->buf_ri)); 4650 PetscCall(PetscFree(merge->buf_rj[0])); 4651 PetscCall(PetscFree(merge->buf_rj)); 4652 PetscCall(PetscFree(merge->coi)); 4653 PetscCall(PetscFree(merge->coj)); 4654 PetscCall(PetscFree(merge->owners_co)); 4655 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4656 PetscCall(PetscFree(merge)); 4657 PetscFunctionReturn(PETSC_SUCCESS); 4658 } 4659 4660 #include <../src/mat/utils/freespace.h> 4661 #include <petscbt.h> 4662 4663 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4664 { 4665 MPI_Comm comm; 4666 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4667 PetscMPIInt size, rank, taga, *len_s; 4668 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4669 PetscMPIInt proc, k; 4670 PetscInt **buf_ri, **buf_rj; 4671 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4672 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4673 MPI_Request *s_waits, *r_waits; 4674 MPI_Status *status; 4675 const MatScalar *aa, *a_a; 4676 MatScalar **abuf_r, *ba_i; 4677 Mat_Merge_SeqsToMPI *merge; 4678 PetscContainer container; 4679 4680 PetscFunctionBegin; 4681 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4682 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4683 4684 PetscCallMPI(MPI_Comm_size(comm, &size)); 4685 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4686 4687 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4688 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4689 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4690 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4691 aa = a_a; 4692 4693 bi = merge->bi; 4694 bj = merge->bj; 4695 buf_ri = merge->buf_ri; 4696 buf_rj = merge->buf_rj; 4697 4698 PetscCall(PetscMalloc1(size, &status)); 4699 owners = merge->rowmap->range; 4700 len_s = merge->len_s; 4701 4702 /* send and recv matrix values */ 4703 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4704 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4705 4706 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4707 for (proc = 0, k = 0; proc < size; proc++) { 4708 if (!len_s[proc]) continue; 4709 i = owners[proc]; 4710 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4711 k++; 4712 } 4713 4714 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4715 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4716 PetscCall(PetscFree(status)); 4717 4718 PetscCall(PetscFree(s_waits)); 4719 PetscCall(PetscFree(r_waits)); 4720 4721 /* insert mat values of mpimat */ 4722 PetscCall(PetscMalloc1(N, &ba_i)); 4723 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4724 4725 for (k = 0; k < merge->nrecv; k++) { 4726 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4727 nrows = *buf_ri_k[k]; 4728 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4729 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4730 } 4731 4732 /* set values of ba */ 4733 m = merge->rowmap->n; 4734 for (i = 0; i < m; i++) { 4735 arow = owners[rank] + i; 4736 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4737 bnzi = bi[i + 1] - bi[i]; 4738 PetscCall(PetscArrayzero(ba_i, bnzi)); 4739 4740 /* add local non-zero vals of this proc's seqmat into ba */ 4741 anzi = ai[arow + 1] - ai[arow]; 4742 aj = a->j + ai[arow]; 4743 aa = a_a + ai[arow]; 4744 nextaj = 0; 4745 for (j = 0; nextaj < anzi; j++) { 4746 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4747 ba_i[j] += aa[nextaj++]; 4748 } 4749 } 4750 4751 /* add received vals into ba */ 4752 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4753 /* i-th row */ 4754 if (i == *nextrow[k]) { 4755 anzi = *(nextai[k] + 1) - *nextai[k]; 4756 aj = buf_rj[k] + *nextai[k]; 4757 aa = abuf_r[k] + *nextai[k]; 4758 nextaj = 0; 4759 for (j = 0; nextaj < anzi; j++) { 4760 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4761 ba_i[j] += aa[nextaj++]; 4762 } 4763 } 4764 nextrow[k]++; 4765 nextai[k]++; 4766 } 4767 } 4768 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4769 } 4770 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4771 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4772 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4773 4774 PetscCall(PetscFree(abuf_r[0])); 4775 PetscCall(PetscFree(abuf_r)); 4776 PetscCall(PetscFree(ba_i)); 4777 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4778 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4779 PetscFunctionReturn(PETSC_SUCCESS); 4780 } 4781 4782 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4783 { 4784 Mat B_mpi; 4785 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4786 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4787 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4788 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4789 PetscInt len, *dnz, *onz, bs, cbs; 4790 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4791 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4792 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4793 MPI_Status *status; 4794 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4795 PetscBT lnkbt; 4796 Mat_Merge_SeqsToMPI *merge; 4797 PetscContainer container; 4798 4799 PetscFunctionBegin; 4800 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4801 4802 /* make sure it is a PETSc comm */ 4803 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4804 PetscCallMPI(MPI_Comm_size(comm, &size)); 4805 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4806 4807 PetscCall(PetscNew(&merge)); 4808 PetscCall(PetscMalloc1(size, &status)); 4809 4810 /* determine row ownership */ 4811 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4812 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4813 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4814 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4815 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4816 PetscCall(PetscMalloc1(size, &len_si)); 4817 PetscCall(PetscMalloc1(size, &merge->len_s)); 4818 4819 m = merge->rowmap->n; 4820 owners = merge->rowmap->range; 4821 4822 /* determine the number of messages to send, their lengths */ 4823 len_s = merge->len_s; 4824 4825 len = 0; /* length of buf_si[] */ 4826 merge->nsend = 0; 4827 for (PetscMPIInt proc = 0; proc < size; proc++) { 4828 len_si[proc] = 0; 4829 if (proc == rank) { 4830 len_s[proc] = 0; 4831 } else { 4832 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4833 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4834 } 4835 if (len_s[proc]) { 4836 merge->nsend++; 4837 nrows = 0; 4838 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4839 if (ai[i + 1] > ai[i]) nrows++; 4840 } 4841 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4842 len += len_si[proc]; 4843 } 4844 } 4845 4846 /* determine the number and length of messages to receive for ij-structure */ 4847 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4848 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4849 4850 /* post the Irecv of j-structure */ 4851 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4852 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4853 4854 /* post the Isend of j-structure */ 4855 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4856 4857 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4858 if (!len_s[proc]) continue; 4859 i = owners[proc]; 4860 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4861 k++; 4862 } 4863 4864 /* receives and sends of j-structure are complete */ 4865 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4866 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4867 4868 /* send and recv i-structure */ 4869 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4870 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4871 4872 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4873 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4874 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4875 if (!len_s[proc]) continue; 4876 /* form outgoing message for i-structure: 4877 buf_si[0]: nrows to be sent 4878 [1:nrows]: row index (global) 4879 [nrows+1:2*nrows+1]: i-structure index 4880 */ 4881 nrows = len_si[proc] / 2 - 1; 4882 buf_si_i = buf_si + nrows + 1; 4883 buf_si[0] = nrows; 4884 buf_si_i[0] = 0; 4885 nrows = 0; 4886 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4887 anzi = ai[i + 1] - ai[i]; 4888 if (anzi) { 4889 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4890 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4891 nrows++; 4892 } 4893 } 4894 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4895 k++; 4896 buf_si += len_si[proc]; 4897 } 4898 4899 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4900 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4901 4902 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4903 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4904 4905 PetscCall(PetscFree(len_si)); 4906 PetscCall(PetscFree(len_ri)); 4907 PetscCall(PetscFree(rj_waits)); 4908 PetscCall(PetscFree2(si_waits, sj_waits)); 4909 PetscCall(PetscFree(ri_waits)); 4910 PetscCall(PetscFree(buf_s)); 4911 PetscCall(PetscFree(status)); 4912 4913 /* compute a local seq matrix in each processor */ 4914 /* allocate bi array and free space for accumulating nonzero column info */ 4915 PetscCall(PetscMalloc1(m + 1, &bi)); 4916 bi[0] = 0; 4917 4918 /* create and initialize a linked list */ 4919 nlnk = N + 1; 4920 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4921 4922 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4923 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4924 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4925 4926 current_space = free_space; 4927 4928 /* determine symbolic info for each local row */ 4929 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4930 4931 for (k = 0; k < merge->nrecv; k++) { 4932 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4933 nrows = *buf_ri_k[k]; 4934 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4935 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4936 } 4937 4938 MatPreallocateBegin(comm, m, n, dnz, onz); 4939 len = 0; 4940 for (i = 0; i < m; i++) { 4941 bnzi = 0; 4942 /* add local non-zero cols of this proc's seqmat into lnk */ 4943 arow = owners[rank] + i; 4944 anzi = ai[arow + 1] - ai[arow]; 4945 aj = a->j + ai[arow]; 4946 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4947 bnzi += nlnk; 4948 /* add received col data into lnk */ 4949 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4950 if (i == *nextrow[k]) { /* i-th row */ 4951 anzi = *(nextai[k] + 1) - *nextai[k]; 4952 aj = buf_rj[k] + *nextai[k]; 4953 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4954 bnzi += nlnk; 4955 nextrow[k]++; 4956 nextai[k]++; 4957 } 4958 } 4959 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4960 4961 /* if free space is not available, make more free space */ 4962 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4963 /* copy data into free space, then initialize lnk */ 4964 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4965 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4966 4967 current_space->array += bnzi; 4968 current_space->local_used += bnzi; 4969 current_space->local_remaining -= bnzi; 4970 4971 bi[i + 1] = bi[i] + bnzi; 4972 } 4973 4974 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4975 4976 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4977 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4978 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4979 4980 /* create symbolic parallel matrix B_mpi */ 4981 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4982 PetscCall(MatCreate(comm, &B_mpi)); 4983 if (n == PETSC_DECIDE) { 4984 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4985 } else { 4986 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4987 } 4988 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4989 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4990 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4991 MatPreallocateEnd(dnz, onz); 4992 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4993 4994 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4995 B_mpi->assembled = PETSC_FALSE; 4996 merge->bi = bi; 4997 merge->bj = bj; 4998 merge->buf_ri = buf_ri; 4999 merge->buf_rj = buf_rj; 5000 merge->coi = NULL; 5001 merge->coj = NULL; 5002 merge->owners_co = NULL; 5003 5004 PetscCall(PetscCommDestroy(&comm)); 5005 5006 /* attach the supporting struct to B_mpi for reuse */ 5007 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5008 PetscCall(PetscContainerSetPointer(container, merge)); 5009 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5010 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5011 PetscCall(PetscContainerDestroy(&container)); 5012 *mpimat = B_mpi; 5013 5014 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5015 PetscFunctionReturn(PETSC_SUCCESS); 5016 } 5017 5018 /*@ 5019 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5020 matrices from each processor 5021 5022 Collective 5023 5024 Input Parameters: 5025 + comm - the communicators the parallel matrix will live on 5026 . seqmat - the input sequential matrices 5027 . m - number of local rows (or `PETSC_DECIDE`) 5028 . n - number of local columns (or `PETSC_DECIDE`) 5029 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5030 5031 Output Parameter: 5032 . mpimat - the parallel matrix generated 5033 5034 Level: advanced 5035 5036 Note: 5037 The dimensions of the sequential matrix in each processor MUST be the same. 5038 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5039 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5040 5041 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5042 @*/ 5043 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5044 { 5045 PetscMPIInt size; 5046 5047 PetscFunctionBegin; 5048 PetscCallMPI(MPI_Comm_size(comm, &size)); 5049 if (size == 1) { 5050 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5051 if (scall == MAT_INITIAL_MATRIX) { 5052 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5053 } else { 5054 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5055 } 5056 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5057 PetscFunctionReturn(PETSC_SUCCESS); 5058 } 5059 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5060 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5061 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5062 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5063 PetscFunctionReturn(PETSC_SUCCESS); 5064 } 5065 5066 /*@ 5067 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5068 5069 Not Collective 5070 5071 Input Parameter: 5072 . A - the matrix 5073 5074 Output Parameter: 5075 . A_loc - the local sequential matrix generated 5076 5077 Level: developer 5078 5079 Notes: 5080 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5081 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5082 `n` is the global column count obtained with `MatGetSize()` 5083 5084 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5085 5086 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5087 5088 Destroy the matrix with `MatDestroy()` 5089 5090 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5091 @*/ 5092 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5093 { 5094 PetscBool mpi; 5095 5096 PetscFunctionBegin; 5097 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5098 if (mpi) { 5099 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5100 } else { 5101 *A_loc = A; 5102 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5103 } 5104 PetscFunctionReturn(PETSC_SUCCESS); 5105 } 5106 5107 /*@ 5108 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5109 5110 Not Collective 5111 5112 Input Parameters: 5113 + A - the matrix 5114 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5115 5116 Output Parameter: 5117 . A_loc - the local sequential matrix generated 5118 5119 Level: developer 5120 5121 Notes: 5122 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5123 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5124 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5125 5126 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5127 5128 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5129 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5130 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5131 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5132 5133 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5134 @*/ 5135 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5136 { 5137 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5138 Mat_SeqAIJ *mat, *a, *b; 5139 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5140 const PetscScalar *aa, *ba, *aav, *bav; 5141 PetscScalar *ca, *cam; 5142 PetscMPIInt size; 5143 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5144 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5145 PetscBool match; 5146 5147 PetscFunctionBegin; 5148 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5149 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5150 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5151 if (size == 1) { 5152 if (scall == MAT_INITIAL_MATRIX) { 5153 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5154 *A_loc = mpimat->A; 5155 } else if (scall == MAT_REUSE_MATRIX) { 5156 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5157 } 5158 PetscFunctionReturn(PETSC_SUCCESS); 5159 } 5160 5161 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5162 a = (Mat_SeqAIJ *)mpimat->A->data; 5163 b = (Mat_SeqAIJ *)mpimat->B->data; 5164 ai = a->i; 5165 aj = a->j; 5166 bi = b->i; 5167 bj = b->j; 5168 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5169 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5170 aa = aav; 5171 ba = bav; 5172 if (scall == MAT_INITIAL_MATRIX) { 5173 PetscCall(PetscMalloc1(1 + am, &ci)); 5174 ci[0] = 0; 5175 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5176 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5177 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5178 k = 0; 5179 for (i = 0; i < am; i++) { 5180 ncols_o = bi[i + 1] - bi[i]; 5181 ncols_d = ai[i + 1] - ai[i]; 5182 /* off-diagonal portion of A */ 5183 for (jo = 0; jo < ncols_o; jo++) { 5184 col = cmap[*bj]; 5185 if (col >= cstart) break; 5186 cj[k] = col; 5187 bj++; 5188 ca[k++] = *ba++; 5189 } 5190 /* diagonal portion of A */ 5191 for (j = 0; j < ncols_d; j++) { 5192 cj[k] = cstart + *aj++; 5193 ca[k++] = *aa++; 5194 } 5195 /* off-diagonal portion of A */ 5196 for (j = jo; j < ncols_o; j++) { 5197 cj[k] = cmap[*bj++]; 5198 ca[k++] = *ba++; 5199 } 5200 } 5201 /* put together the new matrix */ 5202 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5205 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5206 mat->free_a = PETSC_TRUE; 5207 mat->free_ij = PETSC_TRUE; 5208 mat->nonew = 0; 5209 } else if (scall == MAT_REUSE_MATRIX) { 5210 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5211 ci = mat->i; 5212 cj = mat->j; 5213 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5214 for (i = 0; i < am; i++) { 5215 /* off-diagonal portion of A */ 5216 ncols_o = bi[i + 1] - bi[i]; 5217 for (jo = 0; jo < ncols_o; jo++) { 5218 col = cmap[*bj]; 5219 if (col >= cstart) break; 5220 *cam++ = *ba++; 5221 bj++; 5222 } 5223 /* diagonal portion of A */ 5224 ncols_d = ai[i + 1] - ai[i]; 5225 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5226 /* off-diagonal portion of A */ 5227 for (j = jo; j < ncols_o; j++) { 5228 *cam++ = *ba++; 5229 bj++; 5230 } 5231 } 5232 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5233 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5234 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5235 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5236 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5237 PetscFunctionReturn(PETSC_SUCCESS); 5238 } 5239 5240 /*@ 5241 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5242 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5243 5244 Not Collective 5245 5246 Input Parameters: 5247 + A - the matrix 5248 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5249 5250 Output Parameters: 5251 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5252 - A_loc - the local sequential matrix generated 5253 5254 Level: developer 5255 5256 Note: 5257 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5258 part, then those associated with the off-diagonal part (in its local ordering) 5259 5260 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5261 @*/ 5262 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5263 { 5264 Mat Ao, Ad; 5265 const PetscInt *cmap; 5266 PetscMPIInt size; 5267 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5268 5269 PetscFunctionBegin; 5270 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5271 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5272 if (size == 1) { 5273 if (scall == MAT_INITIAL_MATRIX) { 5274 PetscCall(PetscObjectReference((PetscObject)Ad)); 5275 *A_loc = Ad; 5276 } else if (scall == MAT_REUSE_MATRIX) { 5277 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5278 } 5279 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5280 PetscFunctionReturn(PETSC_SUCCESS); 5281 } 5282 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5283 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5284 if (f) { 5285 PetscCall((*f)(A, scall, glob, A_loc)); 5286 } else { 5287 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5288 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5289 Mat_SeqAIJ *c; 5290 PetscInt *ai = a->i, *aj = a->j; 5291 PetscInt *bi = b->i, *bj = b->j; 5292 PetscInt *ci, *cj; 5293 const PetscScalar *aa, *ba; 5294 PetscScalar *ca; 5295 PetscInt i, j, am, dn, on; 5296 5297 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5298 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5299 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5300 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5301 if (scall == MAT_INITIAL_MATRIX) { 5302 PetscInt k; 5303 PetscCall(PetscMalloc1(1 + am, &ci)); 5304 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5305 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5306 ci[0] = 0; 5307 for (i = 0, k = 0; i < am; i++) { 5308 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5309 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5310 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5311 /* diagonal portion of A */ 5312 for (j = 0; j < ncols_d; j++, k++) { 5313 cj[k] = *aj++; 5314 ca[k] = *aa++; 5315 } 5316 /* off-diagonal portion of A */ 5317 for (j = 0; j < ncols_o; j++, k++) { 5318 cj[k] = dn + *bj++; 5319 ca[k] = *ba++; 5320 } 5321 } 5322 /* put together the new matrix */ 5323 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5324 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5325 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5326 c = (Mat_SeqAIJ *)(*A_loc)->data; 5327 c->free_a = PETSC_TRUE; 5328 c->free_ij = PETSC_TRUE; 5329 c->nonew = 0; 5330 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5331 } else if (scall == MAT_REUSE_MATRIX) { 5332 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5333 for (i = 0; i < am; i++) { 5334 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5335 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5336 /* diagonal portion of A */ 5337 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5338 /* off-diagonal portion of A */ 5339 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5340 } 5341 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5342 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5343 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5344 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5345 if (glob) { 5346 PetscInt cst, *gidx; 5347 5348 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5349 PetscCall(PetscMalloc1(dn + on, &gidx)); 5350 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5351 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5352 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5353 } 5354 } 5355 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5356 PetscFunctionReturn(PETSC_SUCCESS); 5357 } 5358 5359 /*@C 5360 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5361 5362 Not Collective 5363 5364 Input Parameters: 5365 + A - the matrix 5366 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5367 . row - index set of rows to extract (or `NULL`) 5368 - col - index set of columns to extract (or `NULL`) 5369 5370 Output Parameter: 5371 . A_loc - the local sequential matrix generated 5372 5373 Level: developer 5374 5375 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5376 @*/ 5377 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5378 { 5379 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5380 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5381 IS isrowa, iscola; 5382 Mat *aloc; 5383 PetscBool match; 5384 5385 PetscFunctionBegin; 5386 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5387 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5388 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5389 if (!row) { 5390 start = A->rmap->rstart; 5391 end = A->rmap->rend; 5392 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5393 } else { 5394 isrowa = *row; 5395 } 5396 if (!col) { 5397 start = A->cmap->rstart; 5398 cmap = a->garray; 5399 nzA = a->A->cmap->n; 5400 nzB = a->B->cmap->n; 5401 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5402 ncols = 0; 5403 for (i = 0; i < nzB; i++) { 5404 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5405 else break; 5406 } 5407 imark = i; 5408 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5409 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5410 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5411 } else { 5412 iscola = *col; 5413 } 5414 if (scall != MAT_INITIAL_MATRIX) { 5415 PetscCall(PetscMalloc1(1, &aloc)); 5416 aloc[0] = *A_loc; 5417 } 5418 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5419 if (!col) { /* attach global id of condensed columns */ 5420 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5421 } 5422 *A_loc = aloc[0]; 5423 PetscCall(PetscFree(aloc)); 5424 if (!row) PetscCall(ISDestroy(&isrowa)); 5425 if (!col) PetscCall(ISDestroy(&iscola)); 5426 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5427 PetscFunctionReturn(PETSC_SUCCESS); 5428 } 5429 5430 /* 5431 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5432 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5433 * on a global size. 5434 * */ 5435 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5436 { 5437 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5438 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5439 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5440 PetscMPIInt owner; 5441 PetscSFNode *iremote, *oiremote; 5442 const PetscInt *lrowindices; 5443 PetscSF sf, osf; 5444 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5445 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5446 MPI_Comm comm; 5447 ISLocalToGlobalMapping mapping; 5448 const PetscScalar *pd_a, *po_a; 5449 5450 PetscFunctionBegin; 5451 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5452 /* plocalsize is the number of roots 5453 * nrows is the number of leaves 5454 * */ 5455 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5456 PetscCall(ISGetLocalSize(rows, &nrows)); 5457 PetscCall(PetscCalloc1(nrows, &iremote)); 5458 PetscCall(ISGetIndices(rows, &lrowindices)); 5459 for (i = 0; i < nrows; i++) { 5460 /* Find a remote index and an owner for a row 5461 * The row could be local or remote 5462 * */ 5463 owner = 0; 5464 lidx = 0; 5465 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5466 iremote[i].index = lidx; 5467 iremote[i].rank = owner; 5468 } 5469 /* Create SF to communicate how many nonzero columns for each row */ 5470 PetscCall(PetscSFCreate(comm, &sf)); 5471 /* SF will figure out the number of nonzero columns for each row, and their 5472 * offsets 5473 * */ 5474 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5475 PetscCall(PetscSFSetFromOptions(sf)); 5476 PetscCall(PetscSFSetUp(sf)); 5477 5478 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5479 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5480 PetscCall(PetscCalloc1(nrows, &pnnz)); 5481 roffsets[0] = 0; 5482 roffsets[1] = 0; 5483 for (i = 0; i < plocalsize; i++) { 5484 /* diagonal */ 5485 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5486 /* off-diagonal */ 5487 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5488 /* compute offsets so that we relative location for each row */ 5489 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5490 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5491 } 5492 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5493 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5494 /* 'r' means root, and 'l' means leaf */ 5495 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5496 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5497 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5498 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5499 PetscCall(PetscSFDestroy(&sf)); 5500 PetscCall(PetscFree(roffsets)); 5501 PetscCall(PetscFree(nrcols)); 5502 dntotalcols = 0; 5503 ontotalcols = 0; 5504 ncol = 0; 5505 for (i = 0; i < nrows; i++) { 5506 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5507 ncol = PetscMax(pnnz[i], ncol); 5508 /* diagonal */ 5509 dntotalcols += nlcols[i * 2 + 0]; 5510 /* off-diagonal */ 5511 ontotalcols += nlcols[i * 2 + 1]; 5512 } 5513 /* We do not need to figure the right number of columns 5514 * since all the calculations will be done by going through the raw data 5515 * */ 5516 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5517 PetscCall(MatSetUp(*P_oth)); 5518 PetscCall(PetscFree(pnnz)); 5519 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5520 /* diagonal */ 5521 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5522 /* off-diagonal */ 5523 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5524 /* diagonal */ 5525 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5526 /* off-diagonal */ 5527 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5528 dntotalcols = 0; 5529 ontotalcols = 0; 5530 ntotalcols = 0; 5531 for (i = 0; i < nrows; i++) { 5532 owner = 0; 5533 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5534 /* Set iremote for diag matrix */ 5535 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5536 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5537 iremote[dntotalcols].rank = owner; 5538 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5539 ilocal[dntotalcols++] = ntotalcols++; 5540 } 5541 /* off-diagonal */ 5542 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5543 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5544 oiremote[ontotalcols].rank = owner; 5545 oilocal[ontotalcols++] = ntotalcols++; 5546 } 5547 } 5548 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5549 PetscCall(PetscFree(loffsets)); 5550 PetscCall(PetscFree(nlcols)); 5551 PetscCall(PetscSFCreate(comm, &sf)); 5552 /* P serves as roots and P_oth is leaves 5553 * Diag matrix 5554 * */ 5555 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5556 PetscCall(PetscSFSetFromOptions(sf)); 5557 PetscCall(PetscSFSetUp(sf)); 5558 5559 PetscCall(PetscSFCreate(comm, &osf)); 5560 /* off-diagonal */ 5561 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5562 PetscCall(PetscSFSetFromOptions(osf)); 5563 PetscCall(PetscSFSetUp(osf)); 5564 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5565 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5566 /* operate on the matrix internal data to save memory */ 5567 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5568 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5569 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5570 /* Convert to global indices for diag matrix */ 5571 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5572 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5573 /* We want P_oth store global indices */ 5574 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5575 /* Use memory scalable approach */ 5576 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5577 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5578 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5579 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5580 /* Convert back to local indices */ 5581 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5582 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5583 nout = 0; 5584 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5585 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5586 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5587 /* Exchange values */ 5588 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5589 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5590 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5591 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5592 /* Stop PETSc from shrinking memory */ 5593 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5594 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5595 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5596 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5597 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5598 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5599 PetscCall(PetscSFDestroy(&sf)); 5600 PetscCall(PetscSFDestroy(&osf)); 5601 PetscFunctionReturn(PETSC_SUCCESS); 5602 } 5603 5604 /* 5605 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5606 * This supports MPIAIJ and MAIJ 5607 * */ 5608 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5609 { 5610 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5611 Mat_SeqAIJ *p_oth; 5612 IS rows, map; 5613 PetscHMapI hamp; 5614 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5615 MPI_Comm comm; 5616 PetscSF sf, osf; 5617 PetscBool has; 5618 5619 PetscFunctionBegin; 5620 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5621 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5622 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5623 * and then create a submatrix (that often is an overlapping matrix) 5624 * */ 5625 if (reuse == MAT_INITIAL_MATRIX) { 5626 /* Use a hash table to figure out unique keys */ 5627 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5628 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5629 count = 0; 5630 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5631 for (i = 0; i < a->B->cmap->n; i++) { 5632 key = a->garray[i] / dof; 5633 PetscCall(PetscHMapIHas(hamp, key, &has)); 5634 if (!has) { 5635 mapping[i] = count; 5636 PetscCall(PetscHMapISet(hamp, key, count++)); 5637 } else { 5638 /* Current 'i' has the same value the previous step */ 5639 mapping[i] = count - 1; 5640 } 5641 } 5642 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5643 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5644 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5645 PetscCall(PetscCalloc1(htsize, &rowindices)); 5646 off = 0; 5647 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5648 PetscCall(PetscHMapIDestroy(&hamp)); 5649 PetscCall(PetscSortInt(htsize, rowindices)); 5650 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5651 /* In case, the matrix was already created but users want to recreate the matrix */ 5652 PetscCall(MatDestroy(P_oth)); 5653 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5654 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5655 PetscCall(ISDestroy(&map)); 5656 PetscCall(ISDestroy(&rows)); 5657 } else if (reuse == MAT_REUSE_MATRIX) { 5658 /* If matrix was already created, we simply update values using SF objects 5659 * that as attached to the matrix earlier. 5660 */ 5661 const PetscScalar *pd_a, *po_a; 5662 5663 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5664 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5665 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5666 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5667 /* Update values in place */ 5668 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5669 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5670 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5671 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5672 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5673 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5674 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5675 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5676 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5677 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5678 PetscFunctionReturn(PETSC_SUCCESS); 5679 } 5680 5681 /*@C 5682 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5683 5684 Collective 5685 5686 Input Parameters: 5687 + A - the first matrix in `MATMPIAIJ` format 5688 . B - the second matrix in `MATMPIAIJ` format 5689 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5690 5691 Output Parameters: 5692 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5693 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5694 - B_seq - the sequential matrix generated 5695 5696 Level: developer 5697 5698 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5699 @*/ 5700 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5701 { 5702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5703 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5704 IS isrowb, iscolb; 5705 Mat *bseq = NULL; 5706 5707 PetscFunctionBegin; 5708 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5709 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5710 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5711 5712 if (scall == MAT_INITIAL_MATRIX) { 5713 start = A->cmap->rstart; 5714 cmap = a->garray; 5715 nzA = a->A->cmap->n; 5716 nzB = a->B->cmap->n; 5717 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5718 ncols = 0; 5719 for (i = 0; i < nzB; i++) { /* row < local row index */ 5720 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5721 else break; 5722 } 5723 imark = i; 5724 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5725 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5726 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5727 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5728 } else { 5729 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5730 isrowb = *rowb; 5731 iscolb = *colb; 5732 PetscCall(PetscMalloc1(1, &bseq)); 5733 bseq[0] = *B_seq; 5734 } 5735 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5736 *B_seq = bseq[0]; 5737 PetscCall(PetscFree(bseq)); 5738 if (!rowb) { 5739 PetscCall(ISDestroy(&isrowb)); 5740 } else { 5741 *rowb = isrowb; 5742 } 5743 if (!colb) { 5744 PetscCall(ISDestroy(&iscolb)); 5745 } else { 5746 *colb = iscolb; 5747 } 5748 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5749 PetscFunctionReturn(PETSC_SUCCESS); 5750 } 5751 5752 /* 5753 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5754 of the OFF-DIAGONAL portion of local A 5755 5756 Collective 5757 5758 Input Parameters: 5759 + A,B - the matrices in `MATMPIAIJ` format 5760 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5761 5762 Output Parameter: 5763 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5764 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5765 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5766 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5767 5768 Developer Note: 5769 This directly accesses information inside the VecScatter associated with the matrix-vector product 5770 for this matrix. This is not desirable.. 5771 5772 Level: developer 5773 5774 */ 5775 5776 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5777 { 5778 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5779 VecScatter ctx; 5780 MPI_Comm comm; 5781 const PetscMPIInt *rprocs, *sprocs; 5782 PetscMPIInt nrecvs, nsends; 5783 const PetscInt *srow, *rstarts, *sstarts; 5784 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5785 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5786 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5787 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5788 PetscMPIInt size, tag, rank, nreqs; 5789 5790 PetscFunctionBegin; 5791 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5792 PetscCallMPI(MPI_Comm_size(comm, &size)); 5793 5794 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5795 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5796 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5797 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5798 5799 if (size == 1) { 5800 startsj_s = NULL; 5801 bufa_ptr = NULL; 5802 *B_oth = NULL; 5803 PetscFunctionReturn(PETSC_SUCCESS); 5804 } 5805 5806 ctx = a->Mvctx; 5807 tag = ((PetscObject)ctx)->tag; 5808 5809 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5810 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5811 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5812 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5813 PetscCall(PetscMalloc1(nreqs, &reqs)); 5814 rwaits = reqs; 5815 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5816 5817 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5818 if (scall == MAT_INITIAL_MATRIX) { 5819 /* i-array */ 5820 /* post receives */ 5821 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5822 for (i = 0; i < nrecvs; i++) { 5823 rowlen = rvalues + rstarts[i] * rbs; 5824 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5825 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5826 } 5827 5828 /* pack the outgoing message */ 5829 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5830 5831 sstartsj[0] = 0; 5832 rstartsj[0] = 0; 5833 len = 0; /* total length of j or a array to be sent */ 5834 if (nsends) { 5835 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5836 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5837 } 5838 for (i = 0; i < nsends; i++) { 5839 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5840 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5841 for (j = 0; j < nrows; j++) { 5842 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5843 for (l = 0; l < sbs; l++) { 5844 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5845 5846 rowlen[j * sbs + l] = ncols; 5847 5848 len += ncols; 5849 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5850 } 5851 k++; 5852 } 5853 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5854 5855 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5856 } 5857 /* recvs and sends of i-array are completed */ 5858 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5859 PetscCall(PetscFree(svalues)); 5860 5861 /* allocate buffers for sending j and a arrays */ 5862 PetscCall(PetscMalloc1(len + 1, &bufj)); 5863 PetscCall(PetscMalloc1(len + 1, &bufa)); 5864 5865 /* create i-array of B_oth */ 5866 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5867 5868 b_othi[0] = 0; 5869 len = 0; /* total length of j or a array to be received */ 5870 k = 0; 5871 for (i = 0; i < nrecvs; i++) { 5872 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5873 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5874 for (j = 0; j < nrows; j++) { 5875 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5876 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5877 k++; 5878 } 5879 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5880 } 5881 PetscCall(PetscFree(rvalues)); 5882 5883 /* allocate space for j and a arrays of B_oth */ 5884 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5885 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5886 5887 /* j-array */ 5888 /* post receives of j-array */ 5889 for (i = 0; i < nrecvs; i++) { 5890 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5891 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5892 } 5893 5894 /* pack the outgoing message j-array */ 5895 if (nsends) k = sstarts[0]; 5896 for (i = 0; i < nsends; i++) { 5897 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5898 bufJ = bufj + sstartsj[i]; 5899 for (j = 0; j < nrows; j++) { 5900 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5901 for (ll = 0; ll < sbs; ll++) { 5902 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5903 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5904 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5905 } 5906 } 5907 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5908 } 5909 5910 /* recvs and sends of j-array are completed */ 5911 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5912 } else if (scall == MAT_REUSE_MATRIX) { 5913 sstartsj = *startsj_s; 5914 rstartsj = *startsj_r; 5915 bufa = *bufa_ptr; 5916 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5917 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5918 5919 /* a-array */ 5920 /* post receives of a-array */ 5921 for (i = 0; i < nrecvs; i++) { 5922 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5923 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5924 } 5925 5926 /* pack the outgoing message a-array */ 5927 if (nsends) k = sstarts[0]; 5928 for (i = 0; i < nsends; i++) { 5929 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5930 bufA = bufa + sstartsj[i]; 5931 for (j = 0; j < nrows; j++) { 5932 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5933 for (ll = 0; ll < sbs; ll++) { 5934 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5935 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5936 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5937 } 5938 } 5939 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5940 } 5941 /* recvs and sends of a-array are completed */ 5942 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5943 PetscCall(PetscFree(reqs)); 5944 5945 if (scall == MAT_INITIAL_MATRIX) { 5946 Mat_SeqAIJ *b_oth; 5947 5948 /* put together the new matrix */ 5949 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5950 5951 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5952 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5953 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5954 b_oth->free_a = PETSC_TRUE; 5955 b_oth->free_ij = PETSC_TRUE; 5956 b_oth->nonew = 0; 5957 5958 PetscCall(PetscFree(bufj)); 5959 if (!startsj_s || !bufa_ptr) { 5960 PetscCall(PetscFree2(sstartsj, rstartsj)); 5961 PetscCall(PetscFree(bufa_ptr)); 5962 } else { 5963 *startsj_s = sstartsj; 5964 *startsj_r = rstartsj; 5965 *bufa_ptr = bufa; 5966 } 5967 } else if (scall == MAT_REUSE_MATRIX) { 5968 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5969 } 5970 5971 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5972 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5973 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5974 PetscFunctionReturn(PETSC_SUCCESS); 5975 } 5976 5977 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5980 #if defined(PETSC_HAVE_MKL_SPARSE) 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5982 #endif 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5985 #if defined(PETSC_HAVE_ELEMENTAL) 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5987 #endif 5988 #if defined(PETSC_HAVE_SCALAPACK) 5989 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5990 #endif 5991 #if defined(PETSC_HAVE_HYPRE) 5992 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5993 #endif 5994 #if defined(PETSC_HAVE_CUDA) 5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5996 #endif 5997 #if defined(PETSC_HAVE_HIP) 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 5999 #endif 6000 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6001 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6002 #endif 6003 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6004 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6005 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6006 6007 /* 6008 Computes (B'*A')' since computing B*A directly is untenable 6009 6010 n p p 6011 [ ] [ ] [ ] 6012 m [ A ] * n [ B ] = m [ C ] 6013 [ ] [ ] [ ] 6014 6015 */ 6016 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6017 { 6018 Mat At, Bt, Ct; 6019 6020 PetscFunctionBegin; 6021 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6022 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6023 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6024 PetscCall(MatDestroy(&At)); 6025 PetscCall(MatDestroy(&Bt)); 6026 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6027 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6028 PetscCall(MatDestroy(&Ct)); 6029 PetscFunctionReturn(PETSC_SUCCESS); 6030 } 6031 6032 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6033 { 6034 PetscBool cisdense; 6035 6036 PetscFunctionBegin; 6037 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6038 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6039 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6040 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6041 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6042 PetscCall(MatSetUp(C)); 6043 6044 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6045 PetscFunctionReturn(PETSC_SUCCESS); 6046 } 6047 6048 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6049 { 6050 Mat_Product *product = C->product; 6051 Mat A = product->A, B = product->B; 6052 6053 PetscFunctionBegin; 6054 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6055 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6056 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6057 C->ops->productsymbolic = MatProductSymbolic_AB; 6058 PetscFunctionReturn(PETSC_SUCCESS); 6059 } 6060 6061 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6062 { 6063 Mat_Product *product = C->product; 6064 6065 PetscFunctionBegin; 6066 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6067 PetscFunctionReturn(PETSC_SUCCESS); 6068 } 6069 6070 /* 6071 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6072 6073 Input Parameters: 6074 6075 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6076 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6077 6078 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6079 6080 For Set1, j1[] contains column indices of the nonzeros. 6081 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6082 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6083 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6084 6085 Similar for Set2. 6086 6087 This routine merges the two sets of nonzeros row by row and removes repeats. 6088 6089 Output Parameters: (memory is allocated by the caller) 6090 6091 i[],j[]: the CSR of the merged matrix, which has m rows. 6092 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6093 imap2[]: similar to imap1[], but for Set2. 6094 Note we order nonzeros row-by-row and from left to right. 6095 */ 6096 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6097 { 6098 PetscInt r, m; /* Row index of mat */ 6099 PetscCount t, t1, t2, b1, e1, b2, e2; 6100 6101 PetscFunctionBegin; 6102 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6103 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6104 i[0] = 0; 6105 for (r = 0; r < m; r++) { /* Do row by row merging */ 6106 b1 = rowBegin1[r]; 6107 e1 = rowEnd1[r]; 6108 b2 = rowBegin2[r]; 6109 e2 = rowEnd2[r]; 6110 while (b1 < e1 && b2 < e2) { 6111 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6112 j[t] = j1[b1]; 6113 imap1[t1] = t; 6114 imap2[t2] = t; 6115 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6116 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6117 t1++; 6118 t2++; 6119 t++; 6120 } else if (j1[b1] < j2[b2]) { 6121 j[t] = j1[b1]; 6122 imap1[t1] = t; 6123 b1 += jmap1[t1 + 1] - jmap1[t1]; 6124 t1++; 6125 t++; 6126 } else { 6127 j[t] = j2[b2]; 6128 imap2[t2] = t; 6129 b2 += jmap2[t2 + 1] - jmap2[t2]; 6130 t2++; 6131 t++; 6132 } 6133 } 6134 /* Merge the remaining in either j1[] or j2[] */ 6135 while (b1 < e1) { 6136 j[t] = j1[b1]; 6137 imap1[t1] = t; 6138 b1 += jmap1[t1 + 1] - jmap1[t1]; 6139 t1++; 6140 t++; 6141 } 6142 while (b2 < e2) { 6143 j[t] = j2[b2]; 6144 imap2[t2] = t; 6145 b2 += jmap2[t2 + 1] - jmap2[t2]; 6146 t2++; 6147 t++; 6148 } 6149 PetscCall(PetscIntCast(t, i + r + 1)); 6150 } 6151 PetscFunctionReturn(PETSC_SUCCESS); 6152 } 6153 6154 /* 6155 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6156 6157 Input Parameters: 6158 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6159 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6160 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6161 6162 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6163 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6164 6165 Output Parameters: 6166 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6167 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6168 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6169 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6170 6171 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6172 Atot: number of entries belonging to the diagonal block. 6173 Annz: number of unique nonzeros belonging to the diagonal block. 6174 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6175 repeats (i.e., same 'i,j' pair). 6176 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6177 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6178 6179 Atot: number of entries belonging to the diagonal block 6180 Annz: number of unique nonzeros belonging to the diagonal block. 6181 6182 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6183 6184 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6185 */ 6186 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6187 { 6188 PetscInt cstart, cend, rstart, rend, row, col; 6189 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6190 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6191 PetscCount k, m, p, q, r, s, mid; 6192 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6193 6194 PetscFunctionBegin; 6195 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6196 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6197 m = rend - rstart; 6198 6199 /* Skip negative rows */ 6200 for (k = 0; k < n; k++) 6201 if (i[k] >= 0) break; 6202 6203 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6204 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6205 */ 6206 while (k < n) { 6207 row = i[k]; 6208 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6209 for (s = k; s < n; s++) 6210 if (i[s] != row) break; 6211 6212 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6213 for (p = k; p < s; p++) { 6214 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6215 } 6216 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6217 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6218 rowBegin[row - rstart] = k; 6219 rowMid[row - rstart] = mid; 6220 rowEnd[row - rstart] = s; 6221 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6222 6223 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6224 Atot += mid - k; 6225 Btot += s - mid; 6226 6227 /* Count unique nonzeros of this diag row */ 6228 for (p = k; p < mid;) { 6229 col = j[p]; 6230 do { 6231 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6232 p++; 6233 } while (p < mid && j[p] == col); 6234 Annz++; 6235 } 6236 6237 /* Count unique nonzeros of this offdiag row */ 6238 for (p = mid; p < s;) { 6239 col = j[p]; 6240 do { 6241 p++; 6242 } while (p < s && j[p] == col); 6243 Bnnz++; 6244 } 6245 k = s; 6246 } 6247 6248 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6249 PetscCall(PetscMalloc1(Atot, &Aperm)); 6250 PetscCall(PetscMalloc1(Btot, &Bperm)); 6251 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6252 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6253 6254 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6255 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6256 for (r = 0; r < m; r++) { 6257 k = rowBegin[r]; 6258 mid = rowMid[r]; 6259 s = rowEnd[r]; 6260 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6261 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6262 Atot += mid - k; 6263 Btot += s - mid; 6264 6265 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6266 for (p = k; p < mid;) { 6267 col = j[p]; 6268 q = p; 6269 do { 6270 p++; 6271 } while (p < mid && j[p] == col); 6272 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6273 Annz++; 6274 } 6275 6276 for (p = mid; p < s;) { 6277 col = j[p]; 6278 q = p; 6279 do { 6280 p++; 6281 } while (p < s && j[p] == col); 6282 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6283 Bnnz++; 6284 } 6285 } 6286 /* Output */ 6287 *Aperm_ = Aperm; 6288 *Annz_ = Annz; 6289 *Atot_ = Atot; 6290 *Ajmap_ = Ajmap; 6291 *Bperm_ = Bperm; 6292 *Bnnz_ = Bnnz; 6293 *Btot_ = Btot; 6294 *Bjmap_ = Bjmap; 6295 PetscFunctionReturn(PETSC_SUCCESS); 6296 } 6297 6298 /* 6299 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6300 6301 Input Parameters: 6302 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6303 nnz: number of unique nonzeros in the merged matrix 6304 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6305 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6306 6307 Output Parameter: (memory is allocated by the caller) 6308 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6309 6310 Example: 6311 nnz1 = 4 6312 nnz = 6 6313 imap = [1,3,4,5] 6314 jmap = [0,3,5,6,7] 6315 then, 6316 jmap_new = [0,0,3,3,5,6,7] 6317 */ 6318 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6319 { 6320 PetscCount k, p; 6321 6322 PetscFunctionBegin; 6323 jmap_new[0] = 0; 6324 p = nnz; /* p loops over jmap_new[] backwards */ 6325 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6326 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6327 } 6328 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6329 PetscFunctionReturn(PETSC_SUCCESS); 6330 } 6331 6332 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6333 { 6334 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6335 6336 PetscFunctionBegin; 6337 PetscCall(PetscSFDestroy(&coo->sf)); 6338 PetscCall(PetscFree(coo->Aperm1)); 6339 PetscCall(PetscFree(coo->Bperm1)); 6340 PetscCall(PetscFree(coo->Ajmap1)); 6341 PetscCall(PetscFree(coo->Bjmap1)); 6342 PetscCall(PetscFree(coo->Aimap2)); 6343 PetscCall(PetscFree(coo->Bimap2)); 6344 PetscCall(PetscFree(coo->Aperm2)); 6345 PetscCall(PetscFree(coo->Bperm2)); 6346 PetscCall(PetscFree(coo->Ajmap2)); 6347 PetscCall(PetscFree(coo->Bjmap2)); 6348 PetscCall(PetscFree(coo->Cperm1)); 6349 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6350 PetscCall(PetscFree(coo)); 6351 PetscFunctionReturn(PETSC_SUCCESS); 6352 } 6353 6354 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6355 { 6356 MPI_Comm comm; 6357 PetscMPIInt rank, size; 6358 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6359 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6360 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6361 PetscContainer container; 6362 MatCOOStruct_MPIAIJ *coo; 6363 6364 PetscFunctionBegin; 6365 PetscCall(PetscFree(mpiaij->garray)); 6366 PetscCall(VecDestroy(&mpiaij->lvec)); 6367 #if defined(PETSC_USE_CTABLE) 6368 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6369 #else 6370 PetscCall(PetscFree(mpiaij->colmap)); 6371 #endif 6372 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6373 mat->assembled = PETSC_FALSE; 6374 mat->was_assembled = PETSC_FALSE; 6375 6376 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6377 PetscCallMPI(MPI_Comm_size(comm, &size)); 6378 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6379 PetscCall(PetscLayoutSetUp(mat->rmap)); 6380 PetscCall(PetscLayoutSetUp(mat->cmap)); 6381 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6382 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6383 PetscCall(MatGetLocalSize(mat, &m, &n)); 6384 PetscCall(MatGetSize(mat, &M, &N)); 6385 6386 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6387 /* entries come first, then local rows, then remote rows. */ 6388 PetscCount n1 = coo_n, *perm1; 6389 PetscInt *i1 = coo_i, *j1 = coo_j; 6390 6391 PetscCall(PetscMalloc1(n1, &perm1)); 6392 for (k = 0; k < n1; k++) perm1[k] = k; 6393 6394 /* Manipulate indices so that entries with negative row or col indices will have smallest 6395 row indices, local entries will have greater but negative row indices, and remote entries 6396 will have positive row indices. 6397 */ 6398 for (k = 0; k < n1; k++) { 6399 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6400 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6401 else { 6402 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6403 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6404 } 6405 } 6406 6407 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6408 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6409 6410 /* Advance k to the first entry we need to take care of */ 6411 for (k = 0; k < n1; k++) 6412 if (i1[k] > PETSC_INT_MIN) break; 6413 PetscCount i1start = k; 6414 6415 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6416 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6417 6418 PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6419 6420 /* Send remote rows to their owner */ 6421 /* Find which rows should be sent to which remote ranks*/ 6422 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6423 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6424 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6425 const PetscInt *ranges; 6426 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6427 6428 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6429 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6430 for (k = rem; k < n1;) { 6431 PetscMPIInt owner; 6432 PetscInt firstRow, lastRow; 6433 6434 /* Locate a row range */ 6435 firstRow = i1[k]; /* first row of this owner */ 6436 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6437 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6438 6439 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6440 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6441 6442 /* All entries in [k,p) belong to this remote owner */ 6443 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6444 PetscMPIInt *sendto2; 6445 PetscInt *nentries2; 6446 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6447 6448 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6449 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6450 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6451 PetscCall(PetscFree2(sendto, nentries2)); 6452 sendto = sendto2; 6453 nentries = nentries2; 6454 maxNsend = maxNsend2; 6455 } 6456 sendto[nsend] = owner; 6457 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6458 nsend++; 6459 k = p; 6460 } 6461 6462 /* Build 1st SF to know offsets on remote to send data */ 6463 PetscSF sf1; 6464 PetscInt nroots = 1, nroots2 = 0; 6465 PetscInt nleaves = nsend, nleaves2 = 0; 6466 PetscInt *offsets; 6467 PetscSFNode *iremote; 6468 6469 PetscCall(PetscSFCreate(comm, &sf1)); 6470 PetscCall(PetscMalloc1(nsend, &iremote)); 6471 PetscCall(PetscMalloc1(nsend, &offsets)); 6472 for (k = 0; k < nsend; k++) { 6473 iremote[k].rank = sendto[k]; 6474 iremote[k].index = 0; 6475 nleaves2 += nentries[k]; 6476 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6477 } 6478 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6479 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6480 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6481 PetscCall(PetscSFDestroy(&sf1)); 6482 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6483 6484 /* Build 2nd SF to send remote COOs to their owner */ 6485 PetscSF sf2; 6486 nroots = nroots2; 6487 nleaves = nleaves2; 6488 PetscCall(PetscSFCreate(comm, &sf2)); 6489 PetscCall(PetscSFSetFromOptions(sf2)); 6490 PetscCall(PetscMalloc1(nleaves, &iremote)); 6491 p = 0; 6492 for (k = 0; k < nsend; k++) { 6493 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6494 for (q = 0; q < nentries[k]; q++, p++) { 6495 iremote[p].rank = sendto[k]; 6496 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6497 } 6498 } 6499 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6500 6501 /* Send the remote COOs to their owner */ 6502 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6503 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6504 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6505 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6506 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6507 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6508 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6509 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6510 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6511 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6512 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6513 6514 PetscCall(PetscFree(offsets)); 6515 PetscCall(PetscFree2(sendto, nentries)); 6516 6517 /* Sort received COOs by row along with the permutation array */ 6518 for (k = 0; k < n2; k++) perm2[k] = k; 6519 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6520 6521 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6522 PetscCount *Cperm1; 6523 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6524 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6525 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6526 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6527 6528 /* Support for HYPRE matrices, kind of a hack. 6529 Swap min column with diagonal so that diagonal values will go first */ 6530 PetscBool hypre; 6531 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6532 if (hypre) { 6533 PetscInt *minj; 6534 PetscBT hasdiag; 6535 6536 PetscCall(PetscBTCreate(m, &hasdiag)); 6537 PetscCall(PetscMalloc1(m, &minj)); 6538 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6539 for (k = i1start; k < rem; k++) { 6540 if (j1[k] < cstart || j1[k] >= cend) continue; 6541 const PetscInt rindex = i1[k] - rstart; 6542 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6543 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6544 } 6545 for (k = 0; k < n2; k++) { 6546 if (j2[k] < cstart || j2[k] >= cend) continue; 6547 const PetscInt rindex = i2[k] - rstart; 6548 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6549 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6550 } 6551 for (k = i1start; k < rem; k++) { 6552 const PetscInt rindex = i1[k] - rstart; 6553 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6554 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6555 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6556 } 6557 for (k = 0; k < n2; k++) { 6558 const PetscInt rindex = i2[k] - rstart; 6559 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6560 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6561 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6562 } 6563 PetscCall(PetscBTDestroy(&hasdiag)); 6564 PetscCall(PetscFree(minj)); 6565 } 6566 6567 /* Split local COOs and received COOs into diag/offdiag portions */ 6568 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6569 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6570 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6571 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6572 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6573 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6574 6575 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6576 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6577 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6578 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6579 6580 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6581 PetscInt *Ai, *Bi; 6582 PetscInt *Aj, *Bj; 6583 6584 PetscCall(PetscMalloc1(m + 1, &Ai)); 6585 PetscCall(PetscMalloc1(m + 1, &Bi)); 6586 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6587 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6588 6589 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6590 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6591 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6592 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6593 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6594 6595 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6596 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6597 6598 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6599 /* expect nonzeros in A/B most likely have local contributing entries */ 6600 PetscInt Annz = Ai[m]; 6601 PetscInt Bnnz = Bi[m]; 6602 PetscCount *Ajmap1_new, *Bjmap1_new; 6603 6604 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6605 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6606 6607 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6608 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6609 6610 PetscCall(PetscFree(Aimap1)); 6611 PetscCall(PetscFree(Ajmap1)); 6612 PetscCall(PetscFree(Bimap1)); 6613 PetscCall(PetscFree(Bjmap1)); 6614 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6615 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6616 PetscCall(PetscFree(perm1)); 6617 PetscCall(PetscFree3(i2, j2, perm2)); 6618 6619 Ajmap1 = Ajmap1_new; 6620 Bjmap1 = Bjmap1_new; 6621 6622 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6623 if (Annz < Annz1 + Annz2) { 6624 PetscInt *Aj_new; 6625 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6626 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6627 PetscCall(PetscFree(Aj)); 6628 Aj = Aj_new; 6629 } 6630 6631 if (Bnnz < Bnnz1 + Bnnz2) { 6632 PetscInt *Bj_new; 6633 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6634 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6635 PetscCall(PetscFree(Bj)); 6636 Bj = Bj_new; 6637 } 6638 6639 /* Create new submatrices for on-process and off-process coupling */ 6640 PetscScalar *Aa, *Ba; 6641 MatType rtype; 6642 Mat_SeqAIJ *a, *b; 6643 PetscObjectState state; 6644 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6645 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6646 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6647 if (cstart) { 6648 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6649 } 6650 6651 PetscCall(MatGetRootType_Private(mat, &rtype)); 6652 6653 MatSeqXAIJGetOptions_Private(mpiaij->A); 6654 PetscCall(MatDestroy(&mpiaij->A)); 6655 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6656 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6657 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6658 6659 MatSeqXAIJGetOptions_Private(mpiaij->B); 6660 PetscCall(MatDestroy(&mpiaij->B)); 6661 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6662 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6663 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6664 6665 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6666 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6667 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6668 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6669 6670 a = (Mat_SeqAIJ *)mpiaij->A->data; 6671 b = (Mat_SeqAIJ *)mpiaij->B->data; 6672 a->free_a = PETSC_TRUE; 6673 a->free_ij = PETSC_TRUE; 6674 b->free_a = PETSC_TRUE; 6675 b->free_ij = PETSC_TRUE; 6676 a->maxnz = a->nz; 6677 b->maxnz = b->nz; 6678 6679 /* conversion must happen AFTER multiply setup */ 6680 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6681 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6682 PetscCall(VecDestroy(&mpiaij->lvec)); 6683 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6684 6685 // Put the COO struct in a container and then attach that to the matrix 6686 PetscCall(PetscMalloc1(1, &coo)); 6687 coo->n = coo_n; 6688 coo->sf = sf2; 6689 coo->sendlen = nleaves; 6690 coo->recvlen = nroots; 6691 coo->Annz = Annz; 6692 coo->Bnnz = Bnnz; 6693 coo->Annz2 = Annz2; 6694 coo->Bnnz2 = Bnnz2; 6695 coo->Atot1 = Atot1; 6696 coo->Atot2 = Atot2; 6697 coo->Btot1 = Btot1; 6698 coo->Btot2 = Btot2; 6699 coo->Ajmap1 = Ajmap1; 6700 coo->Aperm1 = Aperm1; 6701 coo->Bjmap1 = Bjmap1; 6702 coo->Bperm1 = Bperm1; 6703 coo->Aimap2 = Aimap2; 6704 coo->Ajmap2 = Ajmap2; 6705 coo->Aperm2 = Aperm2; 6706 coo->Bimap2 = Bimap2; 6707 coo->Bjmap2 = Bjmap2; 6708 coo->Bperm2 = Bperm2; 6709 coo->Cperm1 = Cperm1; 6710 // Allocate in preallocation. If not used, it has zero cost on host 6711 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6712 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6713 PetscCall(PetscContainerSetPointer(container, coo)); 6714 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6715 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6716 PetscCall(PetscContainerDestroy(&container)); 6717 PetscFunctionReturn(PETSC_SUCCESS); 6718 } 6719 6720 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6721 { 6722 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6723 Mat A = mpiaij->A, B = mpiaij->B; 6724 PetscScalar *Aa, *Ba; 6725 PetscScalar *sendbuf, *recvbuf; 6726 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6727 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6728 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6729 const PetscCount *Cperm1; 6730 PetscContainer container; 6731 MatCOOStruct_MPIAIJ *coo; 6732 6733 PetscFunctionBegin; 6734 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6735 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6736 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6737 sendbuf = coo->sendbuf; 6738 recvbuf = coo->recvbuf; 6739 Ajmap1 = coo->Ajmap1; 6740 Ajmap2 = coo->Ajmap2; 6741 Aimap2 = coo->Aimap2; 6742 Bjmap1 = coo->Bjmap1; 6743 Bjmap2 = coo->Bjmap2; 6744 Bimap2 = coo->Bimap2; 6745 Aperm1 = coo->Aperm1; 6746 Aperm2 = coo->Aperm2; 6747 Bperm1 = coo->Bperm1; 6748 Bperm2 = coo->Bperm2; 6749 Cperm1 = coo->Cperm1; 6750 6751 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6752 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6753 6754 /* Pack entries to be sent to remote */ 6755 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6756 6757 /* Send remote entries to their owner and overlap the communication with local computation */ 6758 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6759 /* Add local entries to A and B */ 6760 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6761 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6762 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6763 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6764 } 6765 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6766 PetscScalar sum = 0.0; 6767 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6768 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6769 } 6770 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6771 6772 /* Add received remote entries to A and B */ 6773 for (PetscCount i = 0; i < coo->Annz2; i++) { 6774 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6775 } 6776 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6777 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6778 } 6779 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6780 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6781 PetscFunctionReturn(PETSC_SUCCESS); 6782 } 6783 6784 /*MC 6785 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6786 6787 Options Database Keys: 6788 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6789 6790 Level: beginner 6791 6792 Notes: 6793 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6794 in this case the values associated with the rows and columns one passes in are set to zero 6795 in the matrix 6796 6797 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6798 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6799 6800 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6801 M*/ 6802 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6803 { 6804 Mat_MPIAIJ *b; 6805 PetscMPIInt size; 6806 6807 PetscFunctionBegin; 6808 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6809 6810 PetscCall(PetscNew(&b)); 6811 B->data = (void *)b; 6812 B->ops[0] = MatOps_Values; 6813 B->assembled = PETSC_FALSE; 6814 B->insertmode = NOT_SET_VALUES; 6815 b->size = size; 6816 6817 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6818 6819 /* build cache for off array entries formed */ 6820 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6821 6822 b->donotstash = PETSC_FALSE; 6823 b->colmap = NULL; 6824 b->garray = NULL; 6825 b->roworiented = PETSC_TRUE; 6826 6827 /* stuff used for matrix vector multiply */ 6828 b->lvec = NULL; 6829 b->Mvctx = NULL; 6830 6831 /* stuff for MatGetRow() */ 6832 b->rowindices = NULL; 6833 b->rowvalues = NULL; 6834 b->getrowactive = PETSC_FALSE; 6835 6836 /* flexible pointer used in CUSPARSE classes */ 6837 b->spptr = NULL; 6838 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6850 #if defined(PETSC_HAVE_CUDA) 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6852 #endif 6853 #if defined(PETSC_HAVE_HIP) 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6855 #endif 6856 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6858 #endif 6859 #if defined(PETSC_HAVE_MKL_SPARSE) 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6861 #endif 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6866 #if defined(PETSC_HAVE_ELEMENTAL) 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6868 #endif 6869 #if defined(PETSC_HAVE_SCALAPACK) 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6871 #endif 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6874 #if defined(PETSC_HAVE_HYPRE) 6875 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6877 #endif 6878 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6882 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6883 PetscFunctionReturn(PETSC_SUCCESS); 6884 } 6885 6886 /*@ 6887 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6888 and "off-diagonal" part of the matrix in CSR format. 6889 6890 Collective 6891 6892 Input Parameters: 6893 + comm - MPI communicator 6894 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6895 . n - This value should be the same as the local size used in creating the 6896 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6897 calculated if `N` is given) For square matrices `n` is almost always `m`. 6898 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6899 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6900 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6901 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6902 . a - matrix values 6903 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6904 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6905 - oa - matrix values 6906 6907 Output Parameter: 6908 . mat - the matrix 6909 6910 Level: advanced 6911 6912 Notes: 6913 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6914 must free the arrays once the matrix has been destroyed and not before. 6915 6916 The `i` and `j` indices are 0 based 6917 6918 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6919 6920 This sets local rows and cannot be used to set off-processor values. 6921 6922 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6923 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6924 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6925 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6926 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6927 communication if it is known that only local entries will be set. 6928 6929 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6930 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6931 @*/ 6932 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6933 { 6934 Mat_MPIAIJ *maij; 6935 6936 PetscFunctionBegin; 6937 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6938 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6939 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6940 PetscCall(MatCreate(comm, mat)); 6941 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6942 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6943 maij = (Mat_MPIAIJ *)(*mat)->data; 6944 6945 (*mat)->preallocated = PETSC_TRUE; 6946 6947 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6948 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6949 6950 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6951 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6952 6953 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6954 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6955 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6956 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6957 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6958 PetscFunctionReturn(PETSC_SUCCESS); 6959 } 6960 6961 typedef struct { 6962 Mat *mp; /* intermediate products */ 6963 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6964 PetscInt cp; /* number of intermediate products */ 6965 6966 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6967 PetscInt *startsj_s, *startsj_r; 6968 PetscScalar *bufa; 6969 Mat P_oth; 6970 6971 /* may take advantage of merging product->B */ 6972 Mat Bloc; /* B-local by merging diag and off-diag */ 6973 6974 /* cusparse does not have support to split between symbolic and numeric phases. 6975 When api_user is true, we don't need to update the numerical values 6976 of the temporary storage */ 6977 PetscBool reusesym; 6978 6979 /* support for COO values insertion */ 6980 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6981 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6982 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6983 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6984 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6985 PetscMemType mtype; 6986 6987 /* customization */ 6988 PetscBool abmerge; 6989 PetscBool P_oth_bind; 6990 } MatMatMPIAIJBACKEND; 6991 6992 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6993 { 6994 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6995 PetscInt i; 6996 6997 PetscFunctionBegin; 6998 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6999 PetscCall(PetscFree(mmdata->bufa)); 7000 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7001 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7002 PetscCall(MatDestroy(&mmdata->P_oth)); 7003 PetscCall(MatDestroy(&mmdata->Bloc)); 7004 PetscCall(PetscSFDestroy(&mmdata->sf)); 7005 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7006 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7007 PetscCall(PetscFree(mmdata->own[0])); 7008 PetscCall(PetscFree(mmdata->own)); 7009 PetscCall(PetscFree(mmdata->off[0])); 7010 PetscCall(PetscFree(mmdata->off)); 7011 PetscCall(PetscFree(mmdata)); 7012 PetscFunctionReturn(PETSC_SUCCESS); 7013 } 7014 7015 /* Copy selected n entries with indices in idx[] of A to v[]. 7016 If idx is NULL, copy the whole data array of A to v[] 7017 */ 7018 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7019 { 7020 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7021 7022 PetscFunctionBegin; 7023 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7024 if (f) { 7025 PetscCall((*f)(A, n, idx, v)); 7026 } else { 7027 const PetscScalar *vv; 7028 7029 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7030 if (n && idx) { 7031 PetscScalar *w = v; 7032 const PetscInt *oi = idx; 7033 PetscInt j; 7034 7035 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7036 } else { 7037 PetscCall(PetscArraycpy(v, vv, n)); 7038 } 7039 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7040 } 7041 PetscFunctionReturn(PETSC_SUCCESS); 7042 } 7043 7044 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7045 { 7046 MatMatMPIAIJBACKEND *mmdata; 7047 PetscInt i, n_d, n_o; 7048 7049 PetscFunctionBegin; 7050 MatCheckProduct(C, 1); 7051 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7052 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7053 if (!mmdata->reusesym) { /* update temporary matrices */ 7054 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7055 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7056 } 7057 mmdata->reusesym = PETSC_FALSE; 7058 7059 for (i = 0; i < mmdata->cp; i++) { 7060 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7061 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7062 } 7063 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7064 PetscInt noff; 7065 7066 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7067 if (mmdata->mptmp[i]) continue; 7068 if (noff) { 7069 PetscInt nown; 7070 7071 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7072 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7073 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7074 n_o += noff; 7075 n_d += nown; 7076 } else { 7077 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7078 7079 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7080 n_d += mm->nz; 7081 } 7082 } 7083 if (mmdata->hasoffproc) { /* offprocess insertion */ 7084 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7085 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7086 } 7087 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7088 PetscFunctionReturn(PETSC_SUCCESS); 7089 } 7090 7091 /* Support for Pt * A, A * P, or Pt * A * P */ 7092 #define MAX_NUMBER_INTERMEDIATE 4 7093 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7094 { 7095 Mat_Product *product = C->product; 7096 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7097 Mat_MPIAIJ *a, *p; 7098 MatMatMPIAIJBACKEND *mmdata; 7099 ISLocalToGlobalMapping P_oth_l2g = NULL; 7100 IS glob = NULL; 7101 const char *prefix; 7102 char pprefix[256]; 7103 const PetscInt *globidx, *P_oth_idx; 7104 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7105 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7106 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7107 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7108 /* a base offset; type-2: sparse with a local to global map table */ 7109 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7110 7111 MatProductType ptype; 7112 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7113 PetscMPIInt size; 7114 7115 PetscFunctionBegin; 7116 MatCheckProduct(C, 1); 7117 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7118 ptype = product->type; 7119 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7120 ptype = MATPRODUCT_AB; 7121 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7122 } 7123 switch (ptype) { 7124 case MATPRODUCT_AB: 7125 A = product->A; 7126 P = product->B; 7127 m = A->rmap->n; 7128 n = P->cmap->n; 7129 M = A->rmap->N; 7130 N = P->cmap->N; 7131 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7132 break; 7133 case MATPRODUCT_AtB: 7134 P = product->A; 7135 A = product->B; 7136 m = P->cmap->n; 7137 n = A->cmap->n; 7138 M = P->cmap->N; 7139 N = A->cmap->N; 7140 hasoffproc = PETSC_TRUE; 7141 break; 7142 case MATPRODUCT_PtAP: 7143 A = product->A; 7144 P = product->B; 7145 m = P->cmap->n; 7146 n = P->cmap->n; 7147 M = P->cmap->N; 7148 N = P->cmap->N; 7149 hasoffproc = PETSC_TRUE; 7150 break; 7151 default: 7152 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7153 } 7154 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7155 if (size == 1) hasoffproc = PETSC_FALSE; 7156 7157 /* defaults */ 7158 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7159 mp[i] = NULL; 7160 mptmp[i] = PETSC_FALSE; 7161 rmapt[i] = -1; 7162 cmapt[i] = -1; 7163 rmapa[i] = NULL; 7164 cmapa[i] = NULL; 7165 } 7166 7167 /* customization */ 7168 PetscCall(PetscNew(&mmdata)); 7169 mmdata->reusesym = product->api_user; 7170 if (ptype == MATPRODUCT_AB) { 7171 if (product->api_user) { 7172 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7173 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7174 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7175 PetscOptionsEnd(); 7176 } else { 7177 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7178 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7179 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7180 PetscOptionsEnd(); 7181 } 7182 } else if (ptype == MATPRODUCT_PtAP) { 7183 if (product->api_user) { 7184 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7185 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7186 PetscOptionsEnd(); 7187 } else { 7188 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7189 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7190 PetscOptionsEnd(); 7191 } 7192 } 7193 a = (Mat_MPIAIJ *)A->data; 7194 p = (Mat_MPIAIJ *)P->data; 7195 PetscCall(MatSetSizes(C, m, n, M, N)); 7196 PetscCall(PetscLayoutSetUp(C->rmap)); 7197 PetscCall(PetscLayoutSetUp(C->cmap)); 7198 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7199 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7200 7201 cp = 0; 7202 switch (ptype) { 7203 case MATPRODUCT_AB: /* A * P */ 7204 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7205 7206 /* A_diag * P_local (merged or not) */ 7207 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7208 /* P is product->B */ 7209 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7210 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7211 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7212 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7213 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7214 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7215 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7216 mp[cp]->product->api_user = product->api_user; 7217 PetscCall(MatProductSetFromOptions(mp[cp])); 7218 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7219 PetscCall(ISGetIndices(glob, &globidx)); 7220 rmapt[cp] = 1; 7221 cmapt[cp] = 2; 7222 cmapa[cp] = globidx; 7223 mptmp[cp] = PETSC_FALSE; 7224 cp++; 7225 } else { /* A_diag * P_diag and A_diag * P_off */ 7226 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7227 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7228 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7229 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7230 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7231 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7232 mp[cp]->product->api_user = product->api_user; 7233 PetscCall(MatProductSetFromOptions(mp[cp])); 7234 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7235 rmapt[cp] = 1; 7236 cmapt[cp] = 1; 7237 mptmp[cp] = PETSC_FALSE; 7238 cp++; 7239 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7240 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7241 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7242 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7243 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7244 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7245 mp[cp]->product->api_user = product->api_user; 7246 PetscCall(MatProductSetFromOptions(mp[cp])); 7247 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7248 rmapt[cp] = 1; 7249 cmapt[cp] = 2; 7250 cmapa[cp] = p->garray; 7251 mptmp[cp] = PETSC_FALSE; 7252 cp++; 7253 } 7254 7255 /* A_off * P_other */ 7256 if (mmdata->P_oth) { 7257 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7258 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7259 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7260 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7261 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7262 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7263 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7264 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7265 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7266 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7267 mp[cp]->product->api_user = product->api_user; 7268 PetscCall(MatProductSetFromOptions(mp[cp])); 7269 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7270 rmapt[cp] = 1; 7271 cmapt[cp] = 2; 7272 cmapa[cp] = P_oth_idx; 7273 mptmp[cp] = PETSC_FALSE; 7274 cp++; 7275 } 7276 break; 7277 7278 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7279 /* A is product->B */ 7280 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7281 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7282 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 PetscCall(ISGetIndices(glob, &globidx)); 7292 rmapt[cp] = 2; 7293 rmapa[cp] = globidx; 7294 cmapt[cp] = 2; 7295 cmapa[cp] = globidx; 7296 mptmp[cp] = PETSC_FALSE; 7297 cp++; 7298 } else { 7299 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 PetscCall(ISGetIndices(glob, &globidx)); 7309 rmapt[cp] = 1; 7310 cmapt[cp] = 2; 7311 cmapa[cp] = globidx; 7312 mptmp[cp] = PETSC_FALSE; 7313 cp++; 7314 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7315 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7316 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7317 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7318 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7319 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7320 mp[cp]->product->api_user = product->api_user; 7321 PetscCall(MatProductSetFromOptions(mp[cp])); 7322 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7323 rmapt[cp] = 2; 7324 rmapa[cp] = p->garray; 7325 cmapt[cp] = 2; 7326 cmapa[cp] = globidx; 7327 mptmp[cp] = PETSC_FALSE; 7328 cp++; 7329 } 7330 break; 7331 case MATPRODUCT_PtAP: 7332 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7333 /* P is product->B */ 7334 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7335 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7336 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7337 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7338 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7339 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7340 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7341 mp[cp]->product->api_user = product->api_user; 7342 PetscCall(MatProductSetFromOptions(mp[cp])); 7343 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7344 PetscCall(ISGetIndices(glob, &globidx)); 7345 rmapt[cp] = 2; 7346 rmapa[cp] = globidx; 7347 cmapt[cp] = 2; 7348 cmapa[cp] = globidx; 7349 mptmp[cp] = PETSC_FALSE; 7350 cp++; 7351 if (mmdata->P_oth) { 7352 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7353 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7354 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7355 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7356 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 mptmp[cp] = PETSC_TRUE; 7366 cp++; 7367 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7368 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7369 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7370 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7371 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7372 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7373 mp[cp]->product->api_user = product->api_user; 7374 PetscCall(MatProductSetFromOptions(mp[cp])); 7375 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7376 rmapt[cp] = 2; 7377 rmapa[cp] = globidx; 7378 cmapt[cp] = 2; 7379 cmapa[cp] = P_oth_idx; 7380 mptmp[cp] = PETSC_FALSE; 7381 cp++; 7382 } 7383 break; 7384 default: 7385 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7386 } 7387 /* sanity check */ 7388 if (size > 1) 7389 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7390 7391 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7392 for (i = 0; i < cp; i++) { 7393 mmdata->mp[i] = mp[i]; 7394 mmdata->mptmp[i] = mptmp[i]; 7395 } 7396 mmdata->cp = cp; 7397 C->product->data = mmdata; 7398 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7399 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7400 7401 /* memory type */ 7402 mmdata->mtype = PETSC_MEMTYPE_HOST; 7403 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7404 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7405 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7406 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7407 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7408 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7409 7410 /* prepare coo coordinates for values insertion */ 7411 7412 /* count total nonzeros of those intermediate seqaij Mats 7413 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7414 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7415 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7416 */ 7417 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7418 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7419 if (mptmp[cp]) continue; 7420 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7421 const PetscInt *rmap = rmapa[cp]; 7422 const PetscInt mr = mp[cp]->rmap->n; 7423 const PetscInt rs = C->rmap->rstart; 7424 const PetscInt re = C->rmap->rend; 7425 const PetscInt *ii = mm->i; 7426 for (i = 0; i < mr; i++) { 7427 const PetscInt gr = rmap[i]; 7428 const PetscInt nz = ii[i + 1] - ii[i]; 7429 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7430 else ncoo_oown += nz; /* this row is local */ 7431 } 7432 } else ncoo_d += mm->nz; 7433 } 7434 7435 /* 7436 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7437 7438 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7439 7440 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7441 7442 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7443 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7444 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7445 7446 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7447 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7448 */ 7449 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7450 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7451 7452 /* gather (i,j) of nonzeros inserted by remote procs */ 7453 if (hasoffproc) { 7454 PetscSF msf; 7455 PetscInt ncoo2, *coo_i2, *coo_j2; 7456 7457 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7458 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7459 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7460 7461 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7462 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7463 PetscInt *idxoff = mmdata->off[cp]; 7464 PetscInt *idxown = mmdata->own[cp]; 7465 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7466 const PetscInt *rmap = rmapa[cp]; 7467 const PetscInt *cmap = cmapa[cp]; 7468 const PetscInt *ii = mm->i; 7469 PetscInt *coi = coo_i + ncoo_o; 7470 PetscInt *coj = coo_j + ncoo_o; 7471 const PetscInt mr = mp[cp]->rmap->n; 7472 const PetscInt rs = C->rmap->rstart; 7473 const PetscInt re = C->rmap->rend; 7474 const PetscInt cs = C->cmap->rstart; 7475 for (i = 0; i < mr; i++) { 7476 const PetscInt *jj = mm->j + ii[i]; 7477 const PetscInt gr = rmap[i]; 7478 const PetscInt nz = ii[i + 1] - ii[i]; 7479 if (gr < rs || gr >= re) { /* this is an offproc row */ 7480 for (j = ii[i]; j < ii[i + 1]; j++) { 7481 *coi++ = gr; 7482 *idxoff++ = j; 7483 } 7484 if (!cmapt[cp]) { /* already global */ 7485 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7486 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7487 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7488 } else { /* offdiag */ 7489 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7490 } 7491 ncoo_o += nz; 7492 } else { /* this is a local row */ 7493 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7494 } 7495 } 7496 } 7497 mmdata->off[cp + 1] = idxoff; 7498 mmdata->own[cp + 1] = idxown; 7499 } 7500 7501 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7502 PetscInt incoo_o; 7503 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7504 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7505 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7506 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7507 ncoo = ncoo_d + ncoo_oown + ncoo2; 7508 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7509 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7510 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7511 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7512 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7513 PetscCall(PetscFree2(coo_i, coo_j)); 7514 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7515 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7516 coo_i = coo_i2; 7517 coo_j = coo_j2; 7518 } else { /* no offproc values insertion */ 7519 ncoo = ncoo_d; 7520 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7521 7522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7523 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7524 PetscCall(PetscSFSetUp(mmdata->sf)); 7525 } 7526 mmdata->hasoffproc = hasoffproc; 7527 7528 /* gather (i,j) of nonzeros inserted locally */ 7529 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7530 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7531 PetscInt *coi = coo_i + ncoo_d; 7532 PetscInt *coj = coo_j + ncoo_d; 7533 const PetscInt *jj = mm->j; 7534 const PetscInt *ii = mm->i; 7535 const PetscInt *cmap = cmapa[cp]; 7536 const PetscInt *rmap = rmapa[cp]; 7537 const PetscInt mr = mp[cp]->rmap->n; 7538 const PetscInt rs = C->rmap->rstart; 7539 const PetscInt re = C->rmap->rend; 7540 const PetscInt cs = C->cmap->rstart; 7541 7542 if (mptmp[cp]) continue; 7543 if (rmapt[cp] == 1) { /* consecutive rows */ 7544 /* fill coo_i */ 7545 for (i = 0; i < mr; i++) { 7546 const PetscInt gr = i + rs; 7547 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7548 } 7549 /* fill coo_j */ 7550 if (!cmapt[cp]) { /* type-0, already global */ 7551 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7552 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7553 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7554 } else { /* type-2, local to global for sparse columns */ 7555 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7556 } 7557 ncoo_d += mm->nz; 7558 } else if (rmapt[cp] == 2) { /* sparse rows */ 7559 for (i = 0; i < mr; i++) { 7560 const PetscInt *jj = mm->j + ii[i]; 7561 const PetscInt gr = rmap[i]; 7562 const PetscInt nz = ii[i + 1] - ii[i]; 7563 if (gr >= rs && gr < re) { /* local rows */ 7564 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7565 if (!cmapt[cp]) { /* type-0, already global */ 7566 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7567 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7568 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7569 } else { /* type-2, local to global for sparse columns */ 7570 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7571 } 7572 ncoo_d += nz; 7573 } 7574 } 7575 } 7576 } 7577 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7578 PetscCall(ISDestroy(&glob)); 7579 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7580 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7581 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7582 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7583 7584 /* set block sizes */ 7585 A = product->A; 7586 P = product->B; 7587 switch (ptype) { 7588 case MATPRODUCT_PtAP: 7589 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7590 break; 7591 case MATPRODUCT_RARt: 7592 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7593 break; 7594 case MATPRODUCT_ABC: 7595 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7596 break; 7597 case MATPRODUCT_AB: 7598 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7599 break; 7600 case MATPRODUCT_AtB: 7601 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7602 break; 7603 case MATPRODUCT_ABt: 7604 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7605 break; 7606 default: 7607 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7608 } 7609 7610 /* preallocate with COO data */ 7611 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7612 PetscCall(PetscFree2(coo_i, coo_j)); 7613 PetscFunctionReturn(PETSC_SUCCESS); 7614 } 7615 7616 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7617 { 7618 Mat_Product *product = mat->product; 7619 #if defined(PETSC_HAVE_DEVICE) 7620 PetscBool match = PETSC_FALSE; 7621 PetscBool usecpu = PETSC_FALSE; 7622 #else 7623 PetscBool match = PETSC_TRUE; 7624 #endif 7625 7626 PetscFunctionBegin; 7627 MatCheckProduct(mat, 1); 7628 #if defined(PETSC_HAVE_DEVICE) 7629 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7630 if (match) { /* we can always fallback to the CPU if requested */ 7631 switch (product->type) { 7632 case MATPRODUCT_AB: 7633 if (product->api_user) { 7634 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7635 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7636 PetscOptionsEnd(); 7637 } else { 7638 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7639 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7640 PetscOptionsEnd(); 7641 } 7642 break; 7643 case MATPRODUCT_AtB: 7644 if (product->api_user) { 7645 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7646 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7647 PetscOptionsEnd(); 7648 } else { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7650 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } 7653 break; 7654 case MATPRODUCT_PtAP: 7655 if (product->api_user) { 7656 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7657 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7658 PetscOptionsEnd(); 7659 } else { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7661 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } 7664 break; 7665 default: 7666 break; 7667 } 7668 match = (PetscBool)!usecpu; 7669 } 7670 #endif 7671 if (match) { 7672 switch (product->type) { 7673 case MATPRODUCT_AB: 7674 case MATPRODUCT_AtB: 7675 case MATPRODUCT_PtAP: 7676 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7677 break; 7678 default: 7679 break; 7680 } 7681 } 7682 /* fallback to MPIAIJ ops */ 7683 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7684 PetscFunctionReturn(PETSC_SUCCESS); 7685 } 7686 7687 /* 7688 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7689 7690 n - the number of block indices in cc[] 7691 cc - the block indices (must be large enough to contain the indices) 7692 */ 7693 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7694 { 7695 PetscInt cnt = -1, nidx, j; 7696 const PetscInt *idx; 7697 7698 PetscFunctionBegin; 7699 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7700 if (nidx) { 7701 cnt = 0; 7702 cc[cnt] = idx[0] / bs; 7703 for (j = 1; j < nidx; j++) { 7704 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7705 } 7706 } 7707 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7708 *n = cnt + 1; 7709 PetscFunctionReturn(PETSC_SUCCESS); 7710 } 7711 7712 /* 7713 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7714 7715 ncollapsed - the number of block indices 7716 collapsed - the block indices (must be large enough to contain the indices) 7717 */ 7718 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7719 { 7720 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7721 7722 PetscFunctionBegin; 7723 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7724 for (i = start + 1; i < start + bs; i++) { 7725 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7726 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7727 cprevtmp = cprev; 7728 cprev = merged; 7729 merged = cprevtmp; 7730 } 7731 *ncollapsed = nprev; 7732 if (collapsed) *collapsed = cprev; 7733 PetscFunctionReturn(PETSC_SUCCESS); 7734 } 7735 7736 /* 7737 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7738 7739 Input Parameter: 7740 . Amat - matrix 7741 - symmetrize - make the result symmetric 7742 + scale - scale with diagonal 7743 7744 Output Parameter: 7745 . a_Gmat - output scalar graph >= 0 7746 7747 */ 7748 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7749 { 7750 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7751 MPI_Comm comm; 7752 Mat Gmat; 7753 PetscBool ismpiaij, isseqaij; 7754 Mat a, b, c; 7755 MatType jtype; 7756 7757 PetscFunctionBegin; 7758 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7759 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7760 PetscCall(MatGetSize(Amat, &MM, &NN)); 7761 PetscCall(MatGetBlockSize(Amat, &bs)); 7762 nloc = (Iend - Istart) / bs; 7763 7764 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7766 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7767 7768 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7769 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7770 implementation */ 7771 if (bs > 1) { 7772 PetscCall(MatGetType(Amat, &jtype)); 7773 PetscCall(MatCreate(comm, &Gmat)); 7774 PetscCall(MatSetType(Gmat, jtype)); 7775 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7776 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7777 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7778 PetscInt *d_nnz, *o_nnz; 7779 MatScalar *aa, val, *AA; 7780 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7781 7782 if (isseqaij) { 7783 a = Amat; 7784 b = NULL; 7785 } else { 7786 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7787 a = d->A; 7788 b = d->B; 7789 } 7790 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7791 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7792 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7793 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7794 const PetscInt *cols1, *cols2; 7795 7796 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7797 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7798 nnz[brow / bs] = nc2 / bs; 7799 if (nc2 % bs) ok = 0; 7800 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7801 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7802 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7803 if (nc1 != nc2) ok = 0; 7804 else { 7805 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7806 if (cols1[jj] != cols2[jj]) ok = 0; 7807 if (cols1[jj] % bs != jj % bs) ok = 0; 7808 } 7809 } 7810 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7811 } 7812 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7813 if (!ok) { 7814 PetscCall(PetscFree2(d_nnz, o_nnz)); 7815 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7816 goto old_bs; 7817 } 7818 } 7819 } 7820 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7821 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7822 PetscCall(PetscFree2(d_nnz, o_nnz)); 7823 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7824 // diag 7825 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7826 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7827 7828 ai = aseq->i; 7829 n = ai[brow + 1] - ai[brow]; 7830 aj = aseq->j + ai[brow]; 7831 for (PetscInt k = 0; k < n; k += bs) { // block columns 7832 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7833 val = 0; 7834 if (index_size == 0) { 7835 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7836 aa = aseq->a + ai[brow + ii] + k; 7837 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7838 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7839 } 7840 } 7841 } else { // use (index,index) value if provided 7842 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7843 PetscInt ii = index[iii]; 7844 aa = aseq->a + ai[brow + ii] + k; 7845 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7846 PetscInt jj = index[jjj]; 7847 val += PetscAbs(PetscRealPart(aa[jj])); 7848 } 7849 } 7850 } 7851 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7852 AA[k / bs] = val; 7853 } 7854 grow = Istart / bs + brow / bs; 7855 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7856 } 7857 // off-diag 7858 if (ismpiaij) { 7859 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7860 const PetscScalar *vals; 7861 const PetscInt *cols, *garray = aij->garray; 7862 7863 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7864 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7865 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7866 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7867 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7868 AA[k / bs] = 0; 7869 AJ[cidx] = garray[cols[k]] / bs; 7870 } 7871 nc = ncols / bs; 7872 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7873 if (index_size == 0) { 7874 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7875 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7876 for (PetscInt k = 0; k < ncols; k += bs) { 7877 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7878 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7879 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7880 } 7881 } 7882 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7883 } 7884 } else { // use (index,index) value if provided 7885 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7886 PetscInt ii = index[iii]; 7887 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7888 for (PetscInt k = 0; k < ncols; k += bs) { 7889 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7890 PetscInt jj = index[jjj]; 7891 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7892 } 7893 } 7894 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7895 } 7896 } 7897 grow = Istart / bs + brow / bs; 7898 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7899 } 7900 } 7901 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7902 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7903 PetscCall(PetscFree2(AA, AJ)); 7904 } else { 7905 const PetscScalar *vals; 7906 const PetscInt *idx; 7907 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7908 old_bs: 7909 /* 7910 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7911 */ 7912 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7913 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7914 if (isseqaij) { 7915 PetscInt max_d_nnz; 7916 7917 /* 7918 Determine exact preallocation count for (sequential) scalar matrix 7919 */ 7920 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7921 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7922 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7923 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7924 PetscCall(PetscFree3(w0, w1, w2)); 7925 } else if (ismpiaij) { 7926 Mat Daij, Oaij; 7927 const PetscInt *garray; 7928 PetscInt max_d_nnz; 7929 7930 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7931 /* 7932 Determine exact preallocation count for diagonal block portion of scalar matrix 7933 */ 7934 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7935 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7936 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7937 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7938 PetscCall(PetscFree3(w0, w1, w2)); 7939 /* 7940 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7941 */ 7942 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7943 o_nnz[jj] = 0; 7944 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7945 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7946 o_nnz[jj] += ncols; 7947 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7948 } 7949 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7950 } 7951 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7952 /* get scalar copy (norms) of matrix */ 7953 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7954 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7955 PetscCall(PetscFree2(d_nnz, o_nnz)); 7956 for (Ii = Istart; Ii < Iend; Ii++) { 7957 PetscInt dest_row = Ii / bs; 7958 7959 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7960 for (jj = 0; jj < ncols; jj++) { 7961 PetscInt dest_col = idx[jj] / bs; 7962 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7963 7964 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7965 } 7966 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7967 } 7968 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7969 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7970 } 7971 } else { 7972 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7973 else { 7974 Gmat = Amat; 7975 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7976 } 7977 if (isseqaij) { 7978 a = Gmat; 7979 b = NULL; 7980 } else { 7981 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7982 a = d->A; 7983 b = d->B; 7984 } 7985 if (filter >= 0 || scale) { 7986 /* take absolute value of each entry */ 7987 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7988 MatInfo info; 7989 PetscScalar *avals; 7990 7991 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7992 PetscCall(MatSeqAIJGetArray(c, &avals)); 7993 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7994 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7995 } 7996 } 7997 } 7998 if (symmetrize) { 7999 PetscBool isset, issym; 8000 8001 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8002 if (!isset || !issym) { 8003 Mat matTrans; 8004 8005 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8006 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8007 PetscCall(MatDestroy(&matTrans)); 8008 } 8009 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8010 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8011 if (scale) { 8012 /* scale c for all diagonal values = 1 or -1 */ 8013 Vec diag; 8014 8015 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8016 PetscCall(MatGetDiagonal(Gmat, diag)); 8017 PetscCall(VecReciprocal(diag)); 8018 PetscCall(VecSqrtAbs(diag)); 8019 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8020 PetscCall(VecDestroy(&diag)); 8021 } 8022 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8023 if (filter >= 0) { 8024 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8025 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8026 } 8027 *a_Gmat = Gmat; 8028 PetscFunctionReturn(PETSC_SUCCESS); 8029 } 8030 8031 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8032 { 8033 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8034 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8035 8036 PetscFunctionBegin; 8037 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8038 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8039 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8040 PetscFunctionReturn(PETSC_SUCCESS); 8041 } 8042 8043 /* 8044 Special version for direct calls from Fortran 8045 */ 8046 8047 /* Change these macros so can be used in void function */ 8048 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8049 #undef PetscCall 8050 #define PetscCall(...) \ 8051 do { \ 8052 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8053 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8054 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8055 return; \ 8056 } \ 8057 } while (0) 8058 8059 #undef SETERRQ 8060 #define SETERRQ(comm, ierr, ...) \ 8061 do { \ 8062 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8063 return; \ 8064 } while (0) 8065 8066 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8067 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8068 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8069 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8070 #else 8071 #endif 8072 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8073 { 8074 Mat mat = *mmat; 8075 PetscInt m = *mm, n = *mn; 8076 InsertMode addv = *maddv; 8077 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8078 PetscScalar value; 8079 8080 MatCheckPreallocated(mat, 1); 8081 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8082 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8083 { 8084 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8085 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8086 PetscBool roworiented = aij->roworiented; 8087 8088 /* Some Variables required in the macro */ 8089 Mat A = aij->A; 8090 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8091 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8092 MatScalar *aa; 8093 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8094 Mat B = aij->B; 8095 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8096 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8097 MatScalar *ba; 8098 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8099 * cannot use "#if defined" inside a macro. */ 8100 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8101 8102 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8103 PetscInt nonew = a->nonew; 8104 MatScalar *ap1, *ap2; 8105 8106 PetscFunctionBegin; 8107 PetscCall(MatSeqAIJGetArray(A, &aa)); 8108 PetscCall(MatSeqAIJGetArray(B, &ba)); 8109 for (i = 0; i < m; i++) { 8110 if (im[i] < 0) continue; 8111 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8112 if (im[i] >= rstart && im[i] < rend) { 8113 row = im[i] - rstart; 8114 lastcol1 = -1; 8115 rp1 = aj + ai[row]; 8116 ap1 = aa + ai[row]; 8117 rmax1 = aimax[row]; 8118 nrow1 = ailen[row]; 8119 low1 = 0; 8120 high1 = nrow1; 8121 lastcol2 = -1; 8122 rp2 = bj + bi[row]; 8123 ap2 = ba + bi[row]; 8124 rmax2 = bimax[row]; 8125 nrow2 = bilen[row]; 8126 low2 = 0; 8127 high2 = nrow2; 8128 8129 for (j = 0; j < n; j++) { 8130 if (roworiented) value = v[i * n + j]; 8131 else value = v[i + j * m]; 8132 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8133 if (in[j] >= cstart && in[j] < cend) { 8134 col = in[j] - cstart; 8135 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8136 } else if (in[j] < 0) continue; 8137 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8138 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8139 } else { 8140 if (mat->was_assembled) { 8141 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8142 #if defined(PETSC_USE_CTABLE) 8143 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8144 col--; 8145 #else 8146 col = aij->colmap[in[j]] - 1; 8147 #endif 8148 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8149 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8150 col = in[j]; 8151 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8152 B = aij->B; 8153 b = (Mat_SeqAIJ *)B->data; 8154 bimax = b->imax; 8155 bi = b->i; 8156 bilen = b->ilen; 8157 bj = b->j; 8158 rp2 = bj + bi[row]; 8159 ap2 = ba + bi[row]; 8160 rmax2 = bimax[row]; 8161 nrow2 = bilen[row]; 8162 low2 = 0; 8163 high2 = nrow2; 8164 bm = aij->B->rmap->n; 8165 ba = b->a; 8166 inserted = PETSC_FALSE; 8167 } 8168 } else col = in[j]; 8169 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8170 } 8171 } 8172 } else if (!aij->donotstash) { 8173 if (roworiented) { 8174 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8175 } else { 8176 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8177 } 8178 } 8179 } 8180 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8181 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8182 } 8183 PetscFunctionReturnVoid(); 8184 } 8185 8186 /* Undefining these here since they were redefined from their original definition above! No 8187 * other PETSc functions should be defined past this point, as it is impossible to recover the 8188 * original definitions */ 8189 #undef PetscCall 8190 #undef SETERRQ 8191