1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ, 2877 NULL, 2878 NULL, 2879 /*155*/ NULL, 2880 MatCopyHashToXAIJ_MPI_Hash}; 2881 2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatStoreValues(aij->A)); 2888 PetscCall(MatStoreValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2893 { 2894 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2895 2896 PetscFunctionBegin; 2897 PetscCall(MatRetrieveValues(aij->A)); 2898 PetscCall(MatRetrieveValues(aij->B)); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2903 { 2904 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2905 PetscMPIInt size; 2906 2907 PetscFunctionBegin; 2908 if (B->hash_active) { 2909 B->ops[0] = b->cops; 2910 B->hash_active = PETSC_FALSE; 2911 } 2912 PetscCall(PetscLayoutSetUp(B->rmap)); 2913 PetscCall(PetscLayoutSetUp(B->cmap)); 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2925 2926 MatSeqXAIJGetOptions_Private(b->B); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->B); 2933 2934 MatSeqXAIJGetOptions_Private(b->A); 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 MatSeqXAIJRestoreOptions_Private(b->A); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2959 else { 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 } 2968 PetscCall(VecScatterDestroy(&b->Mvctx)); 2969 2970 PetscCall(MatResetPreallocation(b->A)); 2971 PetscCall(MatResetPreallocation(b->B)); 2972 B->preallocated = PETSC_TRUE; 2973 B->was_assembled = PETSC_FALSE; 2974 B->assembled = PETSC_FALSE; 2975 PetscFunctionReturn(PETSC_SUCCESS); 2976 } 2977 2978 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2979 { 2980 Mat mat; 2981 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2982 2983 PetscFunctionBegin; 2984 *newmat = NULL; 2985 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2986 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2987 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2988 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2989 a = (Mat_MPIAIJ *)mat->data; 2990 2991 mat->factortype = matin->factortype; 2992 mat->assembled = matin->assembled; 2993 mat->insertmode = NOT_SET_VALUES; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 if (matin->hash_active) { 3006 PetscCall(MatSetUp(mat)); 3007 } else { 3008 mat->preallocated = matin->preallocated; 3009 if (oldmat->colmap) { 3010 #if defined(PETSC_USE_CTABLE) 3011 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3012 #else 3013 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3014 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3015 #endif 3016 } else a->colmap = NULL; 3017 if (oldmat->garray) { 3018 PetscInt len; 3019 len = oldmat->B->cmap->n; 3020 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3021 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3022 } else a->garray = NULL; 3023 3024 /* It may happen MatDuplicate is called with a non-assembled matrix 3025 In fact, MatDuplicate only requires the matrix to be preallocated 3026 This may happen inside a DMCreateMatrix_Shell */ 3027 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3028 if (oldmat->Mvctx) { 3029 a->Mvctx = oldmat->Mvctx; 3030 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3031 } 3032 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3033 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3034 } 3035 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3036 *newmat = mat; 3037 PetscFunctionReturn(PETSC_SUCCESS); 3038 } 3039 3040 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3041 { 3042 PetscBool isbinary, ishdf5; 3043 3044 PetscFunctionBegin; 3045 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3046 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3047 /* force binary viewer to load .info file if it has not yet done so */ 3048 PetscCall(PetscViewerSetUp(viewer)); 3049 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3050 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3051 if (isbinary) { 3052 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3053 } else if (ishdf5) { 3054 #if defined(PETSC_HAVE_HDF5) 3055 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3056 #else 3057 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3058 #endif 3059 } else { 3060 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3061 } 3062 PetscFunctionReturn(PETSC_SUCCESS); 3063 } 3064 3065 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3066 { 3067 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3068 PetscInt *rowidxs, *colidxs; 3069 PetscScalar *matvals; 3070 3071 PetscFunctionBegin; 3072 PetscCall(PetscViewerSetUp(viewer)); 3073 3074 /* read in matrix header */ 3075 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3076 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3077 M = header[1]; 3078 N = header[2]; 3079 nz = header[3]; 3080 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3081 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3082 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3083 3084 /* set block sizes from the viewer's .info file */ 3085 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3086 /* set global sizes if not set already */ 3087 if (mat->rmap->N < 0) mat->rmap->N = M; 3088 if (mat->cmap->N < 0) mat->cmap->N = N; 3089 PetscCall(PetscLayoutSetUp(mat->rmap)); 3090 PetscCall(PetscLayoutSetUp(mat->cmap)); 3091 3092 /* check if the matrix sizes are correct */ 3093 PetscCall(MatGetSize(mat, &rows, &cols)); 3094 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3095 3096 /* read in row lengths and build row indices */ 3097 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3098 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3100 rowidxs[0] = 0; 3101 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3102 if (nz != PETSC_INT_MAX) { 3103 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3104 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3105 } 3106 3107 /* read in column indices and matrix values */ 3108 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3109 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3110 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3111 /* store matrix indices and values */ 3112 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3113 PetscCall(PetscFree(rowidxs)); 3114 PetscCall(PetscFree2(colidxs, matvals)); 3115 PetscFunctionReturn(PETSC_SUCCESS); 3116 } 3117 3118 /* Not scalable because of ISAllGather() unless getting all columns. */ 3119 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3120 { 3121 IS iscol_local; 3122 PetscBool isstride; 3123 PetscMPIInt gisstride = 0; 3124 3125 PetscFunctionBegin; 3126 /* check if we are grabbing all columns*/ 3127 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3128 3129 if (isstride) { 3130 PetscInt start, len, mstart, mlen; 3131 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3132 PetscCall(ISGetLocalSize(iscol, &len)); 3133 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3134 if (mstart == start && mlen - mstart == len) gisstride = 1; 3135 } 3136 3137 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3138 if (gisstride) { 3139 PetscInt N; 3140 PetscCall(MatGetSize(mat, NULL, &N)); 3141 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3142 PetscCall(ISSetIdentity(iscol_local)); 3143 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3144 } else { 3145 PetscInt cbs; 3146 PetscCall(ISGetBlockSize(iscol, &cbs)); 3147 PetscCall(ISAllGather(iscol, &iscol_local)); 3148 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3149 } 3150 3151 *isseq = iscol_local; 3152 PetscFunctionReturn(PETSC_SUCCESS); 3153 } 3154 3155 /* 3156 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3157 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3158 3159 Input Parameters: 3160 + mat - matrix 3161 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3162 i.e., mat->rstart <= isrow[i] < mat->rend 3163 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3164 i.e., mat->cstart <= iscol[i] < mat->cend 3165 3166 Output Parameters: 3167 + isrow_d - sequential row index set for retrieving mat->A 3168 . iscol_d - sequential column index set for retrieving mat->A 3169 . iscol_o - sequential column index set for retrieving mat->B 3170 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3171 */ 3172 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3173 { 3174 Vec x, cmap; 3175 const PetscInt *is_idx; 3176 PetscScalar *xarray, *cmaparray; 3177 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3178 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3179 Mat B = a->B; 3180 Vec lvec = a->lvec, lcmap; 3181 PetscInt i, cstart, cend, Bn = B->cmap->N; 3182 MPI_Comm comm; 3183 VecScatter Mvctx = a->Mvctx; 3184 3185 PetscFunctionBegin; 3186 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3187 PetscCall(ISGetLocalSize(iscol, &ncols)); 3188 3189 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3190 PetscCall(MatCreateVecs(mat, &x, NULL)); 3191 PetscCall(VecSet(x, -1.0)); 3192 PetscCall(VecDuplicate(x, &cmap)); 3193 PetscCall(VecSet(cmap, -1.0)); 3194 3195 /* Get start indices */ 3196 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3197 isstart -= ncols; 3198 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3199 3200 PetscCall(ISGetIndices(iscol, &is_idx)); 3201 PetscCall(VecGetArray(x, &xarray)); 3202 PetscCall(VecGetArray(cmap, &cmaparray)); 3203 PetscCall(PetscMalloc1(ncols, &idx)); 3204 for (i = 0; i < ncols; i++) { 3205 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3206 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3207 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3208 } 3209 PetscCall(VecRestoreArray(x, &xarray)); 3210 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3211 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3212 3213 /* Get iscol_d */ 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3215 PetscCall(ISGetBlockSize(iscol, &i)); 3216 PetscCall(ISSetBlockSize(*iscol_d, i)); 3217 3218 /* Get isrow_d */ 3219 PetscCall(ISGetLocalSize(isrow, &m)); 3220 rstart = mat->rmap->rstart; 3221 PetscCall(PetscMalloc1(m, &idx)); 3222 PetscCall(ISGetIndices(isrow, &is_idx)); 3223 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3224 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3225 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3227 PetscCall(ISGetBlockSize(isrow, &i)); 3228 PetscCall(ISSetBlockSize(*isrow_d, i)); 3229 3230 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3231 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3232 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3233 3234 PetscCall(VecDuplicate(lvec, &lcmap)); 3235 3236 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3237 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3238 3239 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3240 /* off-process column indices */ 3241 count = 0; 3242 PetscCall(PetscMalloc1(Bn, &idx)); 3243 PetscCall(PetscMalloc1(Bn, &cmap1)); 3244 3245 PetscCall(VecGetArray(lvec, &xarray)); 3246 PetscCall(VecGetArray(lcmap, &cmaparray)); 3247 for (i = 0; i < Bn; i++) { 3248 if (PetscRealPart(xarray[i]) > -1.0) { 3249 idx[count] = i; /* local column index in off-diagonal part B */ 3250 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3251 count++; 3252 } 3253 } 3254 PetscCall(VecRestoreArray(lvec, &xarray)); 3255 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3256 3257 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3258 /* cannot ensure iscol_o has same blocksize as iscol! */ 3259 3260 PetscCall(PetscFree(idx)); 3261 *garray = cmap1; 3262 3263 PetscCall(VecDestroy(&x)); 3264 PetscCall(VecDestroy(&cmap)); 3265 PetscCall(VecDestroy(&lcmap)); 3266 PetscFunctionReturn(PETSC_SUCCESS); 3267 } 3268 3269 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3270 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3271 { 3272 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3273 Mat M = NULL; 3274 MPI_Comm comm; 3275 IS iscol_d, isrow_d, iscol_o; 3276 Mat Asub = NULL, Bsub = NULL; 3277 PetscInt n, count, M_size, N_size; 3278 3279 PetscFunctionBegin; 3280 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3281 3282 if (call == MAT_REUSE_MATRIX) { 3283 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3284 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3285 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3286 3287 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3288 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3289 3290 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3291 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3292 3293 /* Update diagonal and off-diagonal portions of submat */ 3294 asub = (Mat_MPIAIJ *)(*submat)->data; 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3296 PetscCall(ISGetLocalSize(iscol_o, &n)); 3297 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3298 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3299 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3300 3301 } else { /* call == MAT_INITIAL_MATRIX) */ 3302 PetscInt *garray, *garray_compact; 3303 PetscInt BsubN; 3304 3305 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3306 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3307 3308 /* Create local submatrices Asub and Bsub */ 3309 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3310 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3311 3312 // Compact garray so its not of size Bn 3313 PetscCall(ISGetSize(iscol_o, &count)); 3314 PetscCall(PetscMalloc1(count, &garray_compact)); 3315 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3316 3317 /* Create submatrix M */ 3318 PetscCall(ISGetSize(isrow, &M_size)); 3319 PetscCall(ISGetSize(iscol, &N_size)); 3320 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3321 3322 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3323 asub = (Mat_MPIAIJ *)M->data; 3324 3325 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3326 n = asub->B->cmap->N; 3327 if (BsubN > n) { 3328 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3329 const PetscInt *idx; 3330 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3331 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3332 3333 PetscCall(PetscMalloc1(n, &idx_new)); 3334 j = 0; 3335 PetscCall(ISGetIndices(iscol_o, &idx)); 3336 for (i = 0; i < n; i++) { 3337 if (j >= BsubN) break; 3338 while (subgarray[i] > garray[j]) j++; 3339 3340 if (subgarray[i] == garray[j]) { 3341 idx_new[i] = idx[j++]; 3342 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3343 } 3344 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3345 3346 PetscCall(ISDestroy(&iscol_o)); 3347 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3348 3349 } else if (BsubN < n) { 3350 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3351 } 3352 3353 PetscCall(PetscFree(garray)); 3354 *submat = M; 3355 3356 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3357 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3358 PetscCall(ISDestroy(&isrow_d)); 3359 3360 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3361 PetscCall(ISDestroy(&iscol_d)); 3362 3363 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3364 PetscCall(ISDestroy(&iscol_o)); 3365 } 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3370 { 3371 IS iscol_local = NULL, isrow_d; 3372 PetscInt csize; 3373 PetscInt n, i, j, start, end; 3374 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3375 MPI_Comm comm; 3376 3377 PetscFunctionBegin; 3378 /* If isrow has same processor distribution as mat, 3379 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3380 if (call == MAT_REUSE_MATRIX) { 3381 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3382 if (isrow_d) { 3383 sameRowDist = PETSC_TRUE; 3384 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3385 } else { 3386 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3387 if (iscol_local) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3390 } 3391 } 3392 } else { 3393 /* Check if isrow has same processor distribution as mat */ 3394 sameDist[0] = PETSC_FALSE; 3395 PetscCall(ISGetLocalSize(isrow, &n)); 3396 if (!n) { 3397 sameDist[0] = PETSC_TRUE; 3398 } else { 3399 PetscCall(ISGetMinMax(isrow, &i, &j)); 3400 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3401 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3402 } 3403 3404 /* Check if iscol has same processor distribution as mat */ 3405 sameDist[1] = PETSC_FALSE; 3406 PetscCall(ISGetLocalSize(iscol, &n)); 3407 if (!n) { 3408 sameDist[1] = PETSC_TRUE; 3409 } else { 3410 PetscCall(ISGetMinMax(iscol, &i, &j)); 3411 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3412 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3413 } 3414 3415 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3416 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3417 sameRowDist = tsameDist[0]; 3418 } 3419 3420 if (sameRowDist) { 3421 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3422 /* isrow and iscol have same processor distribution as mat */ 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } else { /* sameRowDist */ 3426 /* isrow has same processor distribution as mat */ 3427 if (call == MAT_INITIAL_MATRIX) { 3428 PetscBool sorted; 3429 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3430 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3431 PetscCall(ISGetSize(iscol, &i)); 3432 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3433 3434 PetscCall(ISSorted(iscol_local, &sorted)); 3435 if (sorted) { 3436 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3437 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 } else { /* call == MAT_REUSE_MATRIX */ 3441 IS iscol_sub; 3442 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3443 if (iscol_sub) { 3444 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 } 3448 } 3449 } 3450 3451 /* General case: iscol -> iscol_local which has global size of iscol */ 3452 if (call == MAT_REUSE_MATRIX) { 3453 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3454 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3455 } else { 3456 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3457 } 3458 3459 PetscCall(ISGetLocalSize(iscol, &csize)); 3460 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3461 3462 if (call == MAT_INITIAL_MATRIX) { 3463 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3464 PetscCall(ISDestroy(&iscol_local)); 3465 } 3466 PetscFunctionReturn(PETSC_SUCCESS); 3467 } 3468 3469 /*@C 3470 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3471 and "off-diagonal" part of the matrix in CSR format. 3472 3473 Collective 3474 3475 Input Parameters: 3476 + comm - MPI communicator 3477 . M - the global row size 3478 . N - the global column size 3479 . A - "diagonal" portion of matrix 3480 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3481 - garray - either `NULL` or the global index of `B` columns 3482 3483 Output Parameter: 3484 . mat - the matrix, with input `A` as its local diagonal matrix 3485 3486 Level: advanced 3487 3488 Notes: 3489 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3490 3491 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3492 3493 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3494 @*/ 3495 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3496 { 3497 PetscInt m, n; 3498 MatType mpi_mat_type; 3499 3500 PetscFunctionBegin; 3501 PetscCall(MatCreate(comm, mat)); 3502 PetscCall(MatGetSize(A, &m, &n)); 3503 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3504 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3505 3506 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3507 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3508 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3509 PetscCall(MatSetType(*mat, mpi_mat_type)); 3510 3511 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3512 3513 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3514 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3515 PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray)); 3516 PetscFunctionReturn(PETSC_SUCCESS); 3517 } 3518 3519 /* 3520 MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix. 3521 It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in 3522 B with local indices and the correct size, along with the accompanying 3523 garray, hence skipping compactification 3524 3525 Collective 3526 3527 Input Parameters: 3528 + mat - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set 3529 . A - the diag matrix using local col ids 3530 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3531 - garray - either `NULL` or the global index of `B` columns 3532 3533 Output Parameter: 3534 . mat - the updated `MATMPIAIJ` matrix 3535 3536 Level: advanced 3537 3538 Notes: 3539 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3540 3541 `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore. 3542 3543 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3544 */ 3545 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray) 3546 { 3547 PetscFunctionBegin; 3548 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 3549 PetscInt m, n, M, N, Am, An, Bm, Bn; 3550 3551 PetscCall(MatGetSize(mat, &M, &N)); 3552 PetscCall(MatGetLocalSize(mat, &m, &n)); 3553 PetscCall(MatGetLocalSize(A, &Am, &An)); 3554 PetscCall(MatGetLocalSize(B, &Bm, &Bn)); 3555 3556 PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match"); 3557 PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match"); 3558 PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty"); 3559 mpiaij->A = A; 3560 mpiaij->B = B; 3561 mpiaij->garray = garray; 3562 3563 mat->preallocated = PETSC_TRUE; 3564 mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3565 3566 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3567 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3568 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3569 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3570 */ 3571 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3572 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3573 PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3574 PetscFunctionReturn(PETSC_SUCCESS); 3575 } 3576 3577 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3578 3579 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3580 { 3581 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3582 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3583 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3584 Mat M, Msub, B = a->B; 3585 MatScalar *aa; 3586 Mat_SeqAIJ *aij; 3587 PetscInt *garray = a->garray, *colsub, Ncols; 3588 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3589 IS iscol_sub, iscmap; 3590 const PetscInt *is_idx, *cmap; 3591 PetscBool allcolumns = PETSC_FALSE; 3592 MPI_Comm comm; 3593 3594 PetscFunctionBegin; 3595 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3596 if (call == MAT_REUSE_MATRIX) { 3597 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3598 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3599 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3600 3601 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3602 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3603 3604 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3605 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3606 3607 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3608 3609 } else { /* call == MAT_INITIAL_MATRIX) */ 3610 PetscBool flg; 3611 3612 PetscCall(ISGetLocalSize(iscol, &n)); 3613 PetscCall(ISGetSize(iscol, &Ncols)); 3614 3615 /* (1) iscol -> nonscalable iscol_local */ 3616 /* Check for special case: each processor gets entire matrix columns */ 3617 PetscCall(ISIdentity(iscol_local, &flg)); 3618 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3619 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3620 if (allcolumns) { 3621 iscol_sub = iscol_local; 3622 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3623 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3624 3625 } else { 3626 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3627 PetscInt *idx, *cmap1, k; 3628 PetscCall(PetscMalloc1(Ncols, &idx)); 3629 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3630 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3631 count = 0; 3632 k = 0; 3633 for (i = 0; i < Ncols; i++) { 3634 j = is_idx[i]; 3635 if (j >= cstart && j < cend) { 3636 /* diagonal part of mat */ 3637 idx[count] = j; 3638 cmap1[count++] = i; /* column index in submat */ 3639 } else if (Bn) { 3640 /* off-diagonal part of mat */ 3641 if (j == garray[k]) { 3642 idx[count] = j; 3643 cmap1[count++] = i; /* column index in submat */ 3644 } else if (j > garray[k]) { 3645 while (j > garray[k] && k < Bn - 1) k++; 3646 if (j == garray[k]) { 3647 idx[count] = j; 3648 cmap1[count++] = i; /* column index in submat */ 3649 } 3650 } 3651 } 3652 } 3653 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3654 3655 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3656 PetscCall(ISGetBlockSize(iscol, &cbs)); 3657 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3658 3659 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3660 } 3661 3662 /* (3) Create sequential Msub */ 3663 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3664 } 3665 3666 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3667 aij = (Mat_SeqAIJ *)Msub->data; 3668 ii = aij->i; 3669 PetscCall(ISGetIndices(iscmap, &cmap)); 3670 3671 /* 3672 m - number of local rows 3673 Ncols - number of columns (same on all processors) 3674 rstart - first row in new global matrix generated 3675 */ 3676 PetscCall(MatGetSize(Msub, &m, NULL)); 3677 3678 if (call == MAT_INITIAL_MATRIX) { 3679 /* (4) Create parallel newmat */ 3680 PetscMPIInt rank, size; 3681 PetscInt csize; 3682 3683 PetscCallMPI(MPI_Comm_size(comm, &size)); 3684 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3685 3686 /* 3687 Determine the number of non-zeros in the diagonal and off-diagonal 3688 portions of the matrix in order to do correct preallocation 3689 */ 3690 3691 /* first get start and end of "diagonal" columns */ 3692 PetscCall(ISGetLocalSize(iscol, &csize)); 3693 if (csize == PETSC_DECIDE) { 3694 PetscCall(ISGetSize(isrow, &mglobal)); 3695 if (mglobal == Ncols) { /* square matrix */ 3696 nlocal = m; 3697 } else { 3698 nlocal = Ncols / size + ((Ncols % size) > rank); 3699 } 3700 } else { 3701 nlocal = csize; 3702 } 3703 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3704 rstart = rend - nlocal; 3705 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3706 3707 /* next, compute all the lengths */ 3708 jj = aij->j; 3709 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3710 olens = dlens + m; 3711 for (i = 0; i < m; i++) { 3712 jend = ii[i + 1] - ii[i]; 3713 olen = 0; 3714 dlen = 0; 3715 for (j = 0; j < jend; j++) { 3716 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3717 else dlen++; 3718 jj++; 3719 } 3720 olens[i] = olen; 3721 dlens[i] = dlen; 3722 } 3723 3724 PetscCall(ISGetBlockSize(isrow, &bs)); 3725 PetscCall(ISGetBlockSize(iscol, &cbs)); 3726 3727 PetscCall(MatCreate(comm, &M)); 3728 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3729 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3730 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3731 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3732 PetscCall(PetscFree(dlens)); 3733 3734 } else { /* call == MAT_REUSE_MATRIX */ 3735 M = *newmat; 3736 PetscCall(MatGetLocalSize(M, &i, NULL)); 3737 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3738 PetscCall(MatZeroEntries(M)); 3739 /* 3740 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3741 rather than the slower MatSetValues(). 3742 */ 3743 M->was_assembled = PETSC_TRUE; 3744 M->assembled = PETSC_FALSE; 3745 } 3746 3747 /* (5) Set values of Msub to *newmat */ 3748 PetscCall(PetscMalloc1(count, &colsub)); 3749 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3750 3751 jj = aij->j; 3752 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3753 for (i = 0; i < m; i++) { 3754 row = rstart + i; 3755 nz = ii[i + 1] - ii[i]; 3756 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3757 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3758 jj += nz; 3759 aa += nz; 3760 } 3761 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3762 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3763 3764 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3765 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3766 3767 PetscCall(PetscFree(colsub)); 3768 3769 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3770 if (call == MAT_INITIAL_MATRIX) { 3771 *newmat = M; 3772 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3773 PetscCall(MatDestroy(&Msub)); 3774 3775 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3776 PetscCall(ISDestroy(&iscol_sub)); 3777 3778 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3779 PetscCall(ISDestroy(&iscmap)); 3780 3781 if (iscol_local) { 3782 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3783 PetscCall(ISDestroy(&iscol_local)); 3784 } 3785 } 3786 PetscFunctionReturn(PETSC_SUCCESS); 3787 } 3788 3789 /* 3790 Not great since it makes two copies of the submatrix, first an SeqAIJ 3791 in local and then by concatenating the local matrices the end result. 3792 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3793 3794 This requires a sequential iscol with all indices. 3795 */ 3796 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3797 { 3798 PetscMPIInt rank, size; 3799 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3800 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3801 Mat M, Mreuse; 3802 MatScalar *aa, *vwork; 3803 MPI_Comm comm; 3804 Mat_SeqAIJ *aij; 3805 PetscBool colflag, allcolumns = PETSC_FALSE; 3806 3807 PetscFunctionBegin; 3808 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3809 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3810 PetscCallMPI(MPI_Comm_size(comm, &size)); 3811 3812 /* Check for special case: each processor gets entire matrix columns */ 3813 PetscCall(ISIdentity(iscol, &colflag)); 3814 PetscCall(ISGetLocalSize(iscol, &n)); 3815 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3816 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3817 3818 if (call == MAT_REUSE_MATRIX) { 3819 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3820 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3821 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3822 } else { 3823 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3824 } 3825 3826 /* 3827 m - number of local rows 3828 n - number of columns (same on all processors) 3829 rstart - first row in new global matrix generated 3830 */ 3831 PetscCall(MatGetSize(Mreuse, &m, &n)); 3832 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3833 if (call == MAT_INITIAL_MATRIX) { 3834 aij = (Mat_SeqAIJ *)Mreuse->data; 3835 ii = aij->i; 3836 jj = aij->j; 3837 3838 /* 3839 Determine the number of non-zeros in the diagonal and off-diagonal 3840 portions of the matrix in order to do correct preallocation 3841 */ 3842 3843 /* first get start and end of "diagonal" columns */ 3844 if (csize == PETSC_DECIDE) { 3845 PetscCall(ISGetSize(isrow, &mglobal)); 3846 if (mglobal == n) { /* square matrix */ 3847 nlocal = m; 3848 } else { 3849 nlocal = n / size + ((n % size) > rank); 3850 } 3851 } else { 3852 nlocal = csize; 3853 } 3854 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3855 rstart = rend - nlocal; 3856 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3857 3858 /* next, compute all the lengths */ 3859 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3860 olens = dlens + m; 3861 for (i = 0; i < m; i++) { 3862 jend = ii[i + 1] - ii[i]; 3863 olen = 0; 3864 dlen = 0; 3865 for (j = 0; j < jend; j++) { 3866 if (*jj < rstart || *jj >= rend) olen++; 3867 else dlen++; 3868 jj++; 3869 } 3870 olens[i] = olen; 3871 dlens[i] = dlen; 3872 } 3873 PetscCall(MatCreate(comm, &M)); 3874 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3875 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3876 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3877 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3878 PetscCall(PetscFree(dlens)); 3879 } else { 3880 PetscInt ml, nl; 3881 3882 M = *newmat; 3883 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3884 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3885 PetscCall(MatZeroEntries(M)); 3886 /* 3887 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3888 rather than the slower MatSetValues(). 3889 */ 3890 M->was_assembled = PETSC_TRUE; 3891 M->assembled = PETSC_FALSE; 3892 } 3893 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3894 aij = (Mat_SeqAIJ *)Mreuse->data; 3895 ii = aij->i; 3896 jj = aij->j; 3897 3898 /* trigger copy to CPU if needed */ 3899 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3900 for (i = 0; i < m; i++) { 3901 row = rstart + i; 3902 nz = ii[i + 1] - ii[i]; 3903 cwork = jj; 3904 jj = PetscSafePointerPlusOffset(jj, nz); 3905 vwork = aa; 3906 aa = PetscSafePointerPlusOffset(aa, nz); 3907 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3908 } 3909 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3910 3911 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3912 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3913 *newmat = M; 3914 3915 /* save submatrix used in processor for next request */ 3916 if (call == MAT_INITIAL_MATRIX) { 3917 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3918 PetscCall(MatDestroy(&Mreuse)); 3919 } 3920 PetscFunctionReturn(PETSC_SUCCESS); 3921 } 3922 3923 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3924 { 3925 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3926 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3927 const PetscInt *JJ; 3928 PetscBool nooffprocentries; 3929 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3930 3931 PetscFunctionBegin; 3932 PetscCall(PetscLayoutSetUp(B->rmap)); 3933 PetscCall(PetscLayoutSetUp(B->cmap)); 3934 m = B->rmap->n; 3935 cstart = B->cmap->rstart; 3936 cend = B->cmap->rend; 3937 rstart = B->rmap->rstart; 3938 irstart = Ii[0]; 3939 3940 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3941 3942 if (PetscDefined(USE_DEBUG)) { 3943 for (i = 0; i < m; i++) { 3944 nnz = Ii[i + 1] - Ii[i]; 3945 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3946 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3947 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3948 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3949 } 3950 } 3951 3952 for (i = 0; i < m; i++) { 3953 nnz = Ii[i + 1] - Ii[i]; 3954 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3955 nnz_max = PetscMax(nnz_max, nnz); 3956 d = 0; 3957 for (j = 0; j < nnz; j++) { 3958 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3959 } 3960 d_nnz[i] = d; 3961 o_nnz[i] = nnz - d; 3962 } 3963 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3964 PetscCall(PetscFree2(d_nnz, o_nnz)); 3965 3966 for (i = 0; i < m; i++) { 3967 ii = i + rstart; 3968 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3969 } 3970 nooffprocentries = B->nooffprocentries; 3971 B->nooffprocentries = PETSC_TRUE; 3972 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3973 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3974 B->nooffprocentries = nooffprocentries; 3975 3976 /* count number of entries below block diagonal */ 3977 PetscCall(PetscFree(Aij->ld)); 3978 PetscCall(PetscCalloc1(m, &ld)); 3979 Aij->ld = ld; 3980 for (i = 0; i < m; i++) { 3981 nnz = Ii[i + 1] - Ii[i]; 3982 j = 0; 3983 while (j < nnz && J[j] < cstart) j++; 3984 ld[i] = j; 3985 if (J) J += nnz; 3986 } 3987 3988 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3989 PetscFunctionReturn(PETSC_SUCCESS); 3990 } 3991 3992 /*@ 3993 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3994 (the default parallel PETSc format). 3995 3996 Collective 3997 3998 Input Parameters: 3999 + B - the matrix 4000 . i - the indices into `j` for the start of each local row (indices start with zero) 4001 . j - the column indices for each local row (indices start with zero) 4002 - v - optional values in the matrix 4003 4004 Level: developer 4005 4006 Notes: 4007 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 4008 thus you CANNOT change the matrix entries by changing the values of `v` after you have 4009 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4010 4011 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4012 4013 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4014 4015 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4016 4017 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4018 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4019 4020 The format which is used for the sparse matrix input, is equivalent to a 4021 row-major ordering.. i.e for the following matrix, the input data expected is 4022 as shown 4023 .vb 4024 1 0 0 4025 2 0 3 P0 4026 ------- 4027 4 5 6 P1 4028 4029 Process0 [P0] rows_owned=[0,1] 4030 i = {0,1,3} [size = nrow+1 = 2+1] 4031 j = {0,0,2} [size = 3] 4032 v = {1,2,3} [size = 3] 4033 4034 Process1 [P1] rows_owned=[2] 4035 i = {0,3} [size = nrow+1 = 1+1] 4036 j = {0,1,2} [size = 3] 4037 v = {4,5,6} [size = 3] 4038 .ve 4039 4040 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4041 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4044 { 4045 PetscFunctionBegin; 4046 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4047 PetscFunctionReturn(PETSC_SUCCESS); 4048 } 4049 4050 /*@ 4051 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4052 (the default parallel PETSc format). For good matrix assembly performance 4053 the user should preallocate the matrix storage by setting the parameters 4054 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4055 4056 Collective 4057 4058 Input Parameters: 4059 + B - the matrix 4060 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4061 (same value is used for all local rows) 4062 . d_nnz - array containing the number of nonzeros in the various rows of the 4063 DIAGONAL portion of the local submatrix (possibly different for each row) 4064 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4065 The size of this array is equal to the number of local rows, i.e 'm'. 4066 For matrices that will be factored, you must leave room for (and set) 4067 the diagonal entry even if it is zero. 4068 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4069 submatrix (same value is used for all local rows). 4070 - o_nnz - array containing the number of nonzeros in the various rows of the 4071 OFF-DIAGONAL portion of the local submatrix (possibly different for 4072 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4073 structure. The size of this array is equal to the number 4074 of local rows, i.e 'm'. 4075 4076 Example Usage: 4077 Consider the following 8x8 matrix with 34 non-zero values, that is 4078 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4079 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4080 as follows 4081 4082 .vb 4083 1 2 0 | 0 3 0 | 0 4 4084 Proc0 0 5 6 | 7 0 0 | 8 0 4085 9 0 10 | 11 0 0 | 12 0 4086 ------------------------------------- 4087 13 0 14 | 15 16 17 | 0 0 4088 Proc1 0 18 0 | 19 20 21 | 0 0 4089 0 0 0 | 22 23 0 | 24 0 4090 ------------------------------------- 4091 Proc2 25 26 27 | 0 0 28 | 29 0 4092 30 0 0 | 31 32 33 | 0 34 4093 .ve 4094 4095 This can be represented as a collection of submatrices as 4096 .vb 4097 A B C 4098 D E F 4099 G H I 4100 .ve 4101 4102 Where the submatrices A,B,C are owned by proc0, D,E,F are 4103 owned by proc1, G,H,I are owned by proc2. 4104 4105 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4106 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4107 The 'M','N' parameters are 8,8, and have the same values on all procs. 4108 4109 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4110 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4111 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4112 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4113 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4114 matrix, and [DF] as another `MATSEQAIJ` matrix. 4115 4116 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4117 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4118 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4119 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4120 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4121 In this case, the values of `d_nz`, `o_nz` are 4122 .vb 4123 proc0 dnz = 2, o_nz = 2 4124 proc1 dnz = 3, o_nz = 2 4125 proc2 dnz = 1, o_nz = 4 4126 .ve 4127 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4128 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4129 for proc3. i.e we are using 12+15+10=37 storage locations to store 4130 34 values. 4131 4132 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4133 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4134 In the above case the values for `d_nnz`, `o_nnz` are 4135 .vb 4136 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4137 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4138 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4139 .ve 4140 Here the space allocated is sum of all the above values i.e 34, and 4141 hence pre-allocation is perfect. 4142 4143 Level: intermediate 4144 4145 Notes: 4146 If the *_nnz parameter is given then the *_nz parameter is ignored 4147 4148 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4149 storage. The stored row and column indices begin with zero. 4150 See [Sparse Matrices](sec_matsparse) for details. 4151 4152 The parallel matrix is partitioned such that the first m0 rows belong to 4153 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4154 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4155 4156 The DIAGONAL portion of the local submatrix of a processor can be defined 4157 as the submatrix which is obtained by extraction the part corresponding to 4158 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4159 first row that belongs to the processor, r2 is the last row belonging to 4160 the this processor, and c1-c2 is range of indices of the local part of a 4161 vector suitable for applying the matrix to. This is an mxn matrix. In the 4162 common case of a square matrix, the row and column ranges are the same and 4163 the DIAGONAL part is also square. The remaining portion of the local 4164 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4165 4166 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4167 4168 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4169 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4170 You can also run with the option `-info` and look for messages with the string 4171 malloc in them to see if additional memory allocation was needed. 4172 4173 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4174 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4175 @*/ 4176 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4177 { 4178 PetscFunctionBegin; 4179 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4180 PetscValidType(B, 1); 4181 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4182 PetscFunctionReturn(PETSC_SUCCESS); 4183 } 4184 4185 /*@ 4186 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4187 CSR format for the local rows. 4188 4189 Collective 4190 4191 Input Parameters: 4192 + comm - MPI communicator 4193 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4194 . n - This value should be the same as the local size used in creating the 4195 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4196 calculated if `N` is given) For square matrices n is almost always `m`. 4197 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4198 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4199 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4200 . j - global column indices 4201 - a - optional matrix values 4202 4203 Output Parameter: 4204 . mat - the matrix 4205 4206 Level: intermediate 4207 4208 Notes: 4209 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4210 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4211 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4212 4213 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4214 4215 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4216 4217 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4218 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4219 4220 The format which is used for the sparse matrix input, is equivalent to a 4221 row-major ordering, i.e., for the following matrix, the input data expected is 4222 as shown 4223 .vb 4224 1 0 0 4225 2 0 3 P0 4226 ------- 4227 4 5 6 P1 4228 4229 Process0 [P0] rows_owned=[0,1] 4230 i = {0,1,3} [size = nrow+1 = 2+1] 4231 j = {0,0,2} [size = 3] 4232 v = {1,2,3} [size = 3] 4233 4234 Process1 [P1] rows_owned=[2] 4235 i = {0,3} [size = nrow+1 = 1+1] 4236 j = {0,1,2} [size = 3] 4237 v = {4,5,6} [size = 3] 4238 .ve 4239 4240 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4241 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4242 @*/ 4243 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4244 { 4245 PetscFunctionBegin; 4246 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4247 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4248 PetscCall(MatCreate(comm, mat)); 4249 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4250 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4251 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4252 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4253 PetscFunctionReturn(PETSC_SUCCESS); 4254 } 4255 4256 /*@ 4257 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4258 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4259 from `MatCreateMPIAIJWithArrays()` 4260 4261 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4262 4263 Collective 4264 4265 Input Parameters: 4266 + mat - the matrix 4267 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4268 . n - This value should be the same as the local size used in creating the 4269 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4270 calculated if N is given) For square matrices n is almost always m. 4271 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4272 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4273 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4274 . J - column indices 4275 - v - matrix values 4276 4277 Level: deprecated 4278 4279 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4280 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4281 @*/ 4282 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4283 { 4284 PetscInt nnz, i; 4285 PetscBool nooffprocentries; 4286 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4287 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4288 PetscScalar *ad, *ao; 4289 PetscInt ldi, Iii, md; 4290 const PetscInt *Adi = Ad->i; 4291 PetscInt *ld = Aij->ld; 4292 4293 PetscFunctionBegin; 4294 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4295 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4296 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4297 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4298 4299 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4300 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4301 4302 for (i = 0; i < m; i++) { 4303 if (PetscDefined(USE_DEBUG)) { 4304 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4305 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4306 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4307 } 4308 } 4309 nnz = Ii[i + 1] - Ii[i]; 4310 Iii = Ii[i]; 4311 ldi = ld[i]; 4312 md = Adi[i + 1] - Adi[i]; 4313 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4314 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4315 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4316 ad += md; 4317 ao += nnz - md; 4318 } 4319 nooffprocentries = mat->nooffprocentries; 4320 mat->nooffprocentries = PETSC_TRUE; 4321 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4322 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4323 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4324 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4325 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4326 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4327 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4328 mat->nooffprocentries = nooffprocentries; 4329 PetscFunctionReturn(PETSC_SUCCESS); 4330 } 4331 4332 /*@ 4333 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4334 4335 Collective 4336 4337 Input Parameters: 4338 + mat - the matrix 4339 - v - matrix values, stored by row 4340 4341 Level: intermediate 4342 4343 Notes: 4344 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4345 4346 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4347 4348 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4349 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4350 @*/ 4351 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4352 { 4353 PetscInt nnz, i, m; 4354 PetscBool nooffprocentries; 4355 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4358 PetscScalar *ad, *ao; 4359 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4360 PetscInt ldi, Iii, md; 4361 PetscInt *ld = Aij->ld; 4362 4363 PetscFunctionBegin; 4364 m = mat->rmap->n; 4365 4366 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4367 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4368 Iii = 0; 4369 for (i = 0; i < m; i++) { 4370 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4371 ldi = ld[i]; 4372 md = Adi[i + 1] - Adi[i]; 4373 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4374 ad += md; 4375 if (ao) { 4376 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4377 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4378 ao += nnz - md; 4379 } 4380 Iii += nnz; 4381 } 4382 nooffprocentries = mat->nooffprocentries; 4383 mat->nooffprocentries = PETSC_TRUE; 4384 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4385 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4386 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4387 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4388 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4389 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4390 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4391 mat->nooffprocentries = nooffprocentries; 4392 PetscFunctionReturn(PETSC_SUCCESS); 4393 } 4394 4395 /*@ 4396 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4397 (the default parallel PETSc format). For good matrix assembly performance 4398 the user should preallocate the matrix storage by setting the parameters 4399 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4400 4401 Collective 4402 4403 Input Parameters: 4404 + comm - MPI communicator 4405 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4406 This value should be the same as the local size used in creating the 4407 y vector for the matrix-vector product y = Ax. 4408 . n - This value should be the same as the local size used in creating the 4409 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4410 calculated if N is given) For square matrices n is almost always m. 4411 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4412 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4413 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4414 (same value is used for all local rows) 4415 . d_nnz - array containing the number of nonzeros in the various rows of the 4416 DIAGONAL portion of the local submatrix (possibly different for each row) 4417 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4418 The size of this array is equal to the number of local rows, i.e 'm'. 4419 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4420 submatrix (same value is used for all local rows). 4421 - o_nnz - array containing the number of nonzeros in the various rows of the 4422 OFF-DIAGONAL portion of the local submatrix (possibly different for 4423 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4424 structure. The size of this array is equal to the number 4425 of local rows, i.e 'm'. 4426 4427 Output Parameter: 4428 . A - the matrix 4429 4430 Options Database Keys: 4431 + -mat_no_inode - Do not use inodes 4432 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4433 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4434 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4435 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4436 4437 Level: intermediate 4438 4439 Notes: 4440 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4441 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4442 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4443 4444 If the *_nnz parameter is given then the *_nz parameter is ignored 4445 4446 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4447 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4448 storage requirements for this matrix. 4449 4450 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4451 processor than it must be used on all processors that share the object for 4452 that argument. 4453 4454 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4455 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4456 4457 The user MUST specify either the local or global matrix dimensions 4458 (possibly both). 4459 4460 The parallel matrix is partitioned across processors such that the 4461 first `m0` rows belong to process 0, the next `m1` rows belong to 4462 process 1, the next `m2` rows belong to process 2, etc., where 4463 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4464 values corresponding to [m x N] submatrix. 4465 4466 The columns are logically partitioned with the n0 columns belonging 4467 to 0th partition, the next n1 columns belonging to the next 4468 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4469 4470 The DIAGONAL portion of the local submatrix on any given processor 4471 is the submatrix corresponding to the rows and columns m,n 4472 corresponding to the given processor. i.e diagonal matrix on 4473 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4474 etc. The remaining portion of the local submatrix [m x (N-n)] 4475 constitute the OFF-DIAGONAL portion. The example below better 4476 illustrates this concept. The two matrices, the DIAGONAL portion and 4477 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4478 4479 For a square global matrix we define each processor's diagonal portion 4480 to be its local rows and the corresponding columns (a square submatrix); 4481 each processor's off-diagonal portion encompasses the remainder of the 4482 local matrix (a rectangular submatrix). 4483 4484 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4485 4486 When calling this routine with a single process communicator, a matrix of 4487 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4488 type of communicator, use the construction mechanism 4489 .vb 4490 MatCreate(..., &A); 4491 MatSetType(A, MATMPIAIJ); 4492 MatSetSizes(A, m, n, M, N); 4493 MatMPIAIJSetPreallocation(A, ...); 4494 .ve 4495 4496 By default, this format uses inodes (identical nodes) when possible. 4497 We search for consecutive rows with the same nonzero structure, thereby 4498 reusing matrix information to achieve increased efficiency. 4499 4500 Example Usage: 4501 Consider the following 8x8 matrix with 34 non-zero values, that is 4502 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4503 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4504 as follows 4505 4506 .vb 4507 1 2 0 | 0 3 0 | 0 4 4508 Proc0 0 5 6 | 7 0 0 | 8 0 4509 9 0 10 | 11 0 0 | 12 0 4510 ------------------------------------- 4511 13 0 14 | 15 16 17 | 0 0 4512 Proc1 0 18 0 | 19 20 21 | 0 0 4513 0 0 0 | 22 23 0 | 24 0 4514 ------------------------------------- 4515 Proc2 25 26 27 | 0 0 28 | 29 0 4516 30 0 0 | 31 32 33 | 0 34 4517 .ve 4518 4519 This can be represented as a collection of submatrices as 4520 4521 .vb 4522 A B C 4523 D E F 4524 G H I 4525 .ve 4526 4527 Where the submatrices A,B,C are owned by proc0, D,E,F are 4528 owned by proc1, G,H,I are owned by proc2. 4529 4530 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4531 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4532 The 'M','N' parameters are 8,8, and have the same values on all procs. 4533 4534 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4535 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4536 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4537 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4538 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4539 matrix, and [DF] as another SeqAIJ matrix. 4540 4541 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4542 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4543 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4544 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4545 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4546 In this case, the values of `d_nz`,`o_nz` are 4547 .vb 4548 proc0 dnz = 2, o_nz = 2 4549 proc1 dnz = 3, o_nz = 2 4550 proc2 dnz = 1, o_nz = 4 4551 .ve 4552 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4553 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4554 for proc3. i.e we are using 12+15+10=37 storage locations to store 4555 34 values. 4556 4557 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4558 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4559 In the above case the values for d_nnz,o_nnz are 4560 .vb 4561 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4562 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4563 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4564 .ve 4565 Here the space allocated is sum of all the above values i.e 34, and 4566 hence pre-allocation is perfect. 4567 4568 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4569 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4570 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4571 @*/ 4572 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4573 { 4574 PetscMPIInt size; 4575 4576 PetscFunctionBegin; 4577 PetscCall(MatCreate(comm, A)); 4578 PetscCall(MatSetSizes(*A, m, n, M, N)); 4579 PetscCallMPI(MPI_Comm_size(comm, &size)); 4580 if (size > 1) { 4581 PetscCall(MatSetType(*A, MATMPIAIJ)); 4582 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4583 } else { 4584 PetscCall(MatSetType(*A, MATSEQAIJ)); 4585 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4586 } 4587 PetscFunctionReturn(PETSC_SUCCESS); 4588 } 4589 4590 /*MC 4591 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4592 4593 Synopsis: 4594 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4595 4596 Not Collective 4597 4598 Input Parameter: 4599 . A - the `MATMPIAIJ` matrix 4600 4601 Output Parameters: 4602 + Ad - the diagonal portion of the matrix 4603 . Ao - the off-diagonal portion of the matrix 4604 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4605 - ierr - error code 4606 4607 Level: advanced 4608 4609 Note: 4610 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4611 4612 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4613 M*/ 4614 4615 /*MC 4616 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4617 4618 Synopsis: 4619 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4620 4621 Not Collective 4622 4623 Input Parameters: 4624 + A - the `MATMPIAIJ` matrix 4625 . Ad - the diagonal portion of the matrix 4626 . Ao - the off-diagonal portion of the matrix 4627 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4628 - ierr - error code 4629 4630 Level: advanced 4631 4632 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4633 M*/ 4634 4635 /*@C 4636 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4637 4638 Not Collective 4639 4640 Input Parameter: 4641 . A - The `MATMPIAIJ` matrix 4642 4643 Output Parameters: 4644 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4645 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4646 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4647 4648 Level: intermediate 4649 4650 Note: 4651 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4652 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4653 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4654 local column numbers to global column numbers in the original matrix. 4655 4656 Fortran Notes: 4657 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4658 4659 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4660 @*/ 4661 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4662 { 4663 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4664 PetscBool flg; 4665 4666 PetscFunctionBegin; 4667 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4668 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4669 if (Ad) *Ad = a->A; 4670 if (Ao) *Ao = a->B; 4671 if (colmap) *colmap = a->garray; 4672 PetscFunctionReturn(PETSC_SUCCESS); 4673 } 4674 4675 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4676 { 4677 PetscInt m, N, i, rstart, nnz, Ii; 4678 PetscInt *indx; 4679 PetscScalar *values; 4680 MatType rootType; 4681 4682 PetscFunctionBegin; 4683 PetscCall(MatGetSize(inmat, &m, &N)); 4684 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4685 PetscInt *dnz, *onz, sum, bs, cbs; 4686 4687 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4688 /* Check sum(n) = N */ 4689 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4690 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4691 4692 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4693 rstart -= m; 4694 4695 MatPreallocateBegin(comm, m, n, dnz, onz); 4696 for (i = 0; i < m; i++) { 4697 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4698 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4699 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4700 } 4701 4702 PetscCall(MatCreate(comm, outmat)); 4703 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4704 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4705 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4706 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4707 PetscCall(MatSetType(*outmat, rootType)); 4708 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4709 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4710 MatPreallocateEnd(dnz, onz); 4711 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4712 } 4713 4714 /* numeric phase */ 4715 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4716 for (i = 0; i < m; i++) { 4717 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4718 Ii = i + rstart; 4719 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4720 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4721 } 4722 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4723 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4724 PetscFunctionReturn(PETSC_SUCCESS); 4725 } 4726 4727 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4728 { 4729 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4730 4731 PetscFunctionBegin; 4732 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4733 PetscCall(PetscFree(merge->id_r)); 4734 PetscCall(PetscFree(merge->len_s)); 4735 PetscCall(PetscFree(merge->len_r)); 4736 PetscCall(PetscFree(merge->bi)); 4737 PetscCall(PetscFree(merge->bj)); 4738 PetscCall(PetscFree(merge->buf_ri[0])); 4739 PetscCall(PetscFree(merge->buf_ri)); 4740 PetscCall(PetscFree(merge->buf_rj[0])); 4741 PetscCall(PetscFree(merge->buf_rj)); 4742 PetscCall(PetscFree(merge->coi)); 4743 PetscCall(PetscFree(merge->coj)); 4744 PetscCall(PetscFree(merge->owners_co)); 4745 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4746 PetscCall(PetscFree(merge)); 4747 PetscFunctionReturn(PETSC_SUCCESS); 4748 } 4749 4750 #include <../src/mat/utils/freespace.h> 4751 #include <petscbt.h> 4752 4753 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4754 { 4755 MPI_Comm comm; 4756 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4757 PetscMPIInt size, rank, taga, *len_s; 4758 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4759 PetscMPIInt proc, k; 4760 PetscInt **buf_ri, **buf_rj; 4761 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4762 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4763 MPI_Request *s_waits, *r_waits; 4764 MPI_Status *status; 4765 const MatScalar *aa, *a_a; 4766 MatScalar **abuf_r, *ba_i; 4767 Mat_Merge_SeqsToMPI *merge; 4768 PetscContainer container; 4769 4770 PetscFunctionBegin; 4771 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4772 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4773 4774 PetscCallMPI(MPI_Comm_size(comm, &size)); 4775 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4776 4777 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4778 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4779 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4780 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4781 aa = a_a; 4782 4783 bi = merge->bi; 4784 bj = merge->bj; 4785 buf_ri = merge->buf_ri; 4786 buf_rj = merge->buf_rj; 4787 4788 PetscCall(PetscMalloc1(size, &status)); 4789 owners = merge->rowmap->range; 4790 len_s = merge->len_s; 4791 4792 /* send and recv matrix values */ 4793 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4794 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4795 4796 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4797 for (proc = 0, k = 0; proc < size; proc++) { 4798 if (!len_s[proc]) continue; 4799 i = owners[proc]; 4800 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4801 k++; 4802 } 4803 4804 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4805 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4806 PetscCall(PetscFree(status)); 4807 4808 PetscCall(PetscFree(s_waits)); 4809 PetscCall(PetscFree(r_waits)); 4810 4811 /* insert mat values of mpimat */ 4812 PetscCall(PetscMalloc1(N, &ba_i)); 4813 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4814 4815 for (k = 0; k < merge->nrecv; k++) { 4816 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4817 nrows = *buf_ri_k[k]; 4818 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4819 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4820 } 4821 4822 /* set values of ba */ 4823 m = merge->rowmap->n; 4824 for (i = 0; i < m; i++) { 4825 arow = owners[rank] + i; 4826 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4827 bnzi = bi[i + 1] - bi[i]; 4828 PetscCall(PetscArrayzero(ba_i, bnzi)); 4829 4830 /* add local non-zero vals of this proc's seqmat into ba */ 4831 anzi = ai[arow + 1] - ai[arow]; 4832 aj = a->j + ai[arow]; 4833 aa = a_a + ai[arow]; 4834 nextaj = 0; 4835 for (j = 0; nextaj < anzi; j++) { 4836 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4837 ba_i[j] += aa[nextaj++]; 4838 } 4839 } 4840 4841 /* add received vals into ba */ 4842 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4843 /* i-th row */ 4844 if (i == *nextrow[k]) { 4845 anzi = *(nextai[k] + 1) - *nextai[k]; 4846 aj = buf_rj[k] + *nextai[k]; 4847 aa = abuf_r[k] + *nextai[k]; 4848 nextaj = 0; 4849 for (j = 0; nextaj < anzi; j++) { 4850 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4851 ba_i[j] += aa[nextaj++]; 4852 } 4853 } 4854 nextrow[k]++; 4855 nextai[k]++; 4856 } 4857 } 4858 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4859 } 4860 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4861 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4862 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4863 4864 PetscCall(PetscFree(abuf_r[0])); 4865 PetscCall(PetscFree(abuf_r)); 4866 PetscCall(PetscFree(ba_i)); 4867 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4868 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4869 PetscFunctionReturn(PETSC_SUCCESS); 4870 } 4871 4872 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4873 { 4874 Mat B_mpi; 4875 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4876 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4877 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4878 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4879 PetscInt len, *dnz, *onz, bs, cbs; 4880 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4881 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4882 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4883 MPI_Status *status; 4884 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4885 PetscBT lnkbt; 4886 Mat_Merge_SeqsToMPI *merge; 4887 PetscContainer container; 4888 4889 PetscFunctionBegin; 4890 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4891 4892 /* make sure it is a PETSc comm */ 4893 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4894 PetscCallMPI(MPI_Comm_size(comm, &size)); 4895 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4896 4897 PetscCall(PetscNew(&merge)); 4898 PetscCall(PetscMalloc1(size, &status)); 4899 4900 /* determine row ownership */ 4901 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4902 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4903 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4904 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4905 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4906 PetscCall(PetscMalloc1(size, &len_si)); 4907 PetscCall(PetscMalloc1(size, &merge->len_s)); 4908 4909 m = merge->rowmap->n; 4910 owners = merge->rowmap->range; 4911 4912 /* determine the number of messages to send, their lengths */ 4913 len_s = merge->len_s; 4914 4915 len = 0; /* length of buf_si[] */ 4916 merge->nsend = 0; 4917 for (PetscMPIInt proc = 0; proc < size; proc++) { 4918 len_si[proc] = 0; 4919 if (proc == rank) { 4920 len_s[proc] = 0; 4921 } else { 4922 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4923 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4924 } 4925 if (len_s[proc]) { 4926 merge->nsend++; 4927 nrows = 0; 4928 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4929 if (ai[i + 1] > ai[i]) nrows++; 4930 } 4931 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4932 len += len_si[proc]; 4933 } 4934 } 4935 4936 /* determine the number and length of messages to receive for ij-structure */ 4937 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4938 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4939 4940 /* post the Irecv of j-structure */ 4941 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4942 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4943 4944 /* post the Isend of j-structure */ 4945 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4946 4947 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4948 if (!len_s[proc]) continue; 4949 i = owners[proc]; 4950 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4951 k++; 4952 } 4953 4954 /* receives and sends of j-structure are complete */ 4955 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4956 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4957 4958 /* send and recv i-structure */ 4959 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4960 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4961 4962 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4963 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4964 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4965 if (!len_s[proc]) continue; 4966 /* form outgoing message for i-structure: 4967 buf_si[0]: nrows to be sent 4968 [1:nrows]: row index (global) 4969 [nrows+1:2*nrows+1]: i-structure index 4970 */ 4971 nrows = len_si[proc] / 2 - 1; 4972 buf_si_i = buf_si + nrows + 1; 4973 buf_si[0] = nrows; 4974 buf_si_i[0] = 0; 4975 nrows = 0; 4976 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4977 anzi = ai[i + 1] - ai[i]; 4978 if (anzi) { 4979 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4980 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4981 nrows++; 4982 } 4983 } 4984 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4985 k++; 4986 buf_si += len_si[proc]; 4987 } 4988 4989 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4990 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4991 4992 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4993 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4994 4995 PetscCall(PetscFree(len_si)); 4996 PetscCall(PetscFree(len_ri)); 4997 PetscCall(PetscFree(rj_waits)); 4998 PetscCall(PetscFree2(si_waits, sj_waits)); 4999 PetscCall(PetscFree(ri_waits)); 5000 PetscCall(PetscFree(buf_s)); 5001 PetscCall(PetscFree(status)); 5002 5003 /* compute a local seq matrix in each processor */ 5004 /* allocate bi array and free space for accumulating nonzero column info */ 5005 PetscCall(PetscMalloc1(m + 1, &bi)); 5006 bi[0] = 0; 5007 5008 /* create and initialize a linked list */ 5009 nlnk = N + 1; 5010 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5011 5012 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5013 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5014 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5015 5016 current_space = free_space; 5017 5018 /* determine symbolic info for each local row */ 5019 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5020 5021 for (k = 0; k < merge->nrecv; k++) { 5022 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5023 nrows = *buf_ri_k[k]; 5024 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5025 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5026 } 5027 5028 MatPreallocateBegin(comm, m, n, dnz, onz); 5029 len = 0; 5030 for (i = 0; i < m; i++) { 5031 bnzi = 0; 5032 /* add local non-zero cols of this proc's seqmat into lnk */ 5033 arow = owners[rank] + i; 5034 anzi = ai[arow + 1] - ai[arow]; 5035 aj = a->j + ai[arow]; 5036 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5037 bnzi += nlnk; 5038 /* add received col data into lnk */ 5039 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5040 if (i == *nextrow[k]) { /* i-th row */ 5041 anzi = *(nextai[k] + 1) - *nextai[k]; 5042 aj = buf_rj[k] + *nextai[k]; 5043 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5044 bnzi += nlnk; 5045 nextrow[k]++; 5046 nextai[k]++; 5047 } 5048 } 5049 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5050 5051 /* if free space is not available, make more free space */ 5052 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5053 /* copy data into free space, then initialize lnk */ 5054 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5055 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5056 5057 current_space->array += bnzi; 5058 current_space->local_used += bnzi; 5059 current_space->local_remaining -= bnzi; 5060 5061 bi[i + 1] = bi[i] + bnzi; 5062 } 5063 5064 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5065 5066 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5067 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5068 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5069 5070 /* create symbolic parallel matrix B_mpi */ 5071 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5072 PetscCall(MatCreate(comm, &B_mpi)); 5073 if (n == PETSC_DECIDE) { 5074 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5075 } else { 5076 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5077 } 5078 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5079 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5080 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5081 MatPreallocateEnd(dnz, onz); 5082 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5083 5084 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5085 B_mpi->assembled = PETSC_FALSE; 5086 merge->bi = bi; 5087 merge->bj = bj; 5088 merge->buf_ri = buf_ri; 5089 merge->buf_rj = buf_rj; 5090 merge->coi = NULL; 5091 merge->coj = NULL; 5092 merge->owners_co = NULL; 5093 5094 PetscCall(PetscCommDestroy(&comm)); 5095 5096 /* attach the supporting struct to B_mpi for reuse */ 5097 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5098 PetscCall(PetscContainerSetPointer(container, merge)); 5099 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5100 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5101 PetscCall(PetscContainerDestroy(&container)); 5102 *mpimat = B_mpi; 5103 5104 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5105 PetscFunctionReturn(PETSC_SUCCESS); 5106 } 5107 5108 /*@ 5109 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5110 matrices from each processor 5111 5112 Collective 5113 5114 Input Parameters: 5115 + comm - the communicators the parallel matrix will live on 5116 . seqmat - the input sequential matrices 5117 . m - number of local rows (or `PETSC_DECIDE`) 5118 . n - number of local columns (or `PETSC_DECIDE`) 5119 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5120 5121 Output Parameter: 5122 . mpimat - the parallel matrix generated 5123 5124 Level: advanced 5125 5126 Note: 5127 The dimensions of the sequential matrix in each processor MUST be the same. 5128 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5129 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5130 5131 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5132 @*/ 5133 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5134 { 5135 PetscMPIInt size; 5136 5137 PetscFunctionBegin; 5138 PetscCallMPI(MPI_Comm_size(comm, &size)); 5139 if (size == 1) { 5140 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5141 if (scall == MAT_INITIAL_MATRIX) { 5142 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5143 } else { 5144 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5145 } 5146 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5147 PetscFunctionReturn(PETSC_SUCCESS); 5148 } 5149 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5150 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5151 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5152 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5153 PetscFunctionReturn(PETSC_SUCCESS); 5154 } 5155 5156 /*@ 5157 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5158 5159 Not Collective 5160 5161 Input Parameter: 5162 . A - the matrix 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5171 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5172 `n` is the global column count obtained with `MatGetSize()` 5173 5174 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5175 5176 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5177 5178 Destroy the matrix with `MatDestroy()` 5179 5180 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5181 @*/ 5182 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5183 { 5184 PetscBool mpi; 5185 5186 PetscFunctionBegin; 5187 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5188 if (mpi) { 5189 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5190 } else { 5191 *A_loc = A; 5192 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5193 } 5194 PetscFunctionReturn(PETSC_SUCCESS); 5195 } 5196 5197 /*@ 5198 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5199 5200 Not Collective 5201 5202 Input Parameters: 5203 + A - the matrix 5204 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5205 5206 Output Parameter: 5207 . A_loc - the local sequential matrix generated 5208 5209 Level: developer 5210 5211 Notes: 5212 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5213 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5214 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5215 5216 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5217 5218 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5219 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5220 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5221 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5222 5223 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5224 @*/ 5225 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5226 { 5227 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5228 Mat_SeqAIJ *mat, *a, *b; 5229 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5230 const PetscScalar *aa, *ba, *aav, *bav; 5231 PetscScalar *ca, *cam; 5232 PetscMPIInt size; 5233 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5234 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5235 PetscBool match; 5236 5237 PetscFunctionBegin; 5238 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5239 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5240 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5241 if (size == 1) { 5242 if (scall == MAT_INITIAL_MATRIX) { 5243 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5244 *A_loc = mpimat->A; 5245 } else if (scall == MAT_REUSE_MATRIX) { 5246 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5247 } 5248 PetscFunctionReturn(PETSC_SUCCESS); 5249 } 5250 5251 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5252 a = (Mat_SeqAIJ *)mpimat->A->data; 5253 b = (Mat_SeqAIJ *)mpimat->B->data; 5254 ai = a->i; 5255 aj = a->j; 5256 bi = b->i; 5257 bj = b->j; 5258 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5259 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5260 aa = aav; 5261 ba = bav; 5262 if (scall == MAT_INITIAL_MATRIX) { 5263 PetscCall(PetscMalloc1(1 + am, &ci)); 5264 ci[0] = 0; 5265 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5266 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5267 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5268 k = 0; 5269 for (i = 0; i < am; i++) { 5270 ncols_o = bi[i + 1] - bi[i]; 5271 ncols_d = ai[i + 1] - ai[i]; 5272 /* off-diagonal portion of A */ 5273 for (jo = 0; jo < ncols_o; jo++) { 5274 col = cmap[*bj]; 5275 if (col >= cstart) break; 5276 cj[k] = col; 5277 bj++; 5278 ca[k++] = *ba++; 5279 } 5280 /* diagonal portion of A */ 5281 for (j = 0; j < ncols_d; j++) { 5282 cj[k] = cstart + *aj++; 5283 ca[k++] = *aa++; 5284 } 5285 /* off-diagonal portion of A */ 5286 for (j = jo; j < ncols_o; j++) { 5287 cj[k] = cmap[*bj++]; 5288 ca[k++] = *ba++; 5289 } 5290 } 5291 /* put together the new matrix */ 5292 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5293 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5294 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5295 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5296 mat->free_a = PETSC_TRUE; 5297 mat->free_ij = PETSC_TRUE; 5298 mat->nonew = 0; 5299 } else if (scall == MAT_REUSE_MATRIX) { 5300 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5301 ci = mat->i; 5302 cj = mat->j; 5303 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5304 for (i = 0; i < am; i++) { 5305 /* off-diagonal portion of A */ 5306 ncols_o = bi[i + 1] - bi[i]; 5307 for (jo = 0; jo < ncols_o; jo++) { 5308 col = cmap[*bj]; 5309 if (col >= cstart) break; 5310 *cam++ = *ba++; 5311 bj++; 5312 } 5313 /* diagonal portion of A */ 5314 ncols_d = ai[i + 1] - ai[i]; 5315 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5316 /* off-diagonal portion of A */ 5317 for (j = jo; j < ncols_o; j++) { 5318 *cam++ = *ba++; 5319 bj++; 5320 } 5321 } 5322 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5324 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5325 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5326 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5327 PetscFunctionReturn(PETSC_SUCCESS); 5328 } 5329 5330 /*@ 5331 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5332 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5333 5334 Not Collective 5335 5336 Input Parameters: 5337 + A - the matrix 5338 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5339 5340 Output Parameters: 5341 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5342 - A_loc - the local sequential matrix generated 5343 5344 Level: developer 5345 5346 Note: 5347 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5348 part, then those associated with the off-diagonal part (in its local ordering) 5349 5350 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5351 @*/ 5352 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5353 { 5354 Mat Ao, Ad; 5355 const PetscInt *cmap; 5356 PetscMPIInt size; 5357 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5358 5359 PetscFunctionBegin; 5360 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5361 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5362 if (size == 1) { 5363 if (scall == MAT_INITIAL_MATRIX) { 5364 PetscCall(PetscObjectReference((PetscObject)Ad)); 5365 *A_loc = Ad; 5366 } else if (scall == MAT_REUSE_MATRIX) { 5367 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5368 } 5369 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5370 PetscFunctionReturn(PETSC_SUCCESS); 5371 } 5372 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5373 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5374 if (f) { 5375 PetscCall((*f)(A, scall, glob, A_loc)); 5376 } else { 5377 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5378 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5379 Mat_SeqAIJ *c; 5380 PetscInt *ai = a->i, *aj = a->j; 5381 PetscInt *bi = b->i, *bj = b->j; 5382 PetscInt *ci, *cj; 5383 const PetscScalar *aa, *ba; 5384 PetscScalar *ca; 5385 PetscInt i, j, am, dn, on; 5386 5387 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5388 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5389 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5390 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5391 if (scall == MAT_INITIAL_MATRIX) { 5392 PetscInt k; 5393 PetscCall(PetscMalloc1(1 + am, &ci)); 5394 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5395 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5396 ci[0] = 0; 5397 for (i = 0, k = 0; i < am; i++) { 5398 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5399 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5400 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5401 /* diagonal portion of A */ 5402 for (j = 0; j < ncols_d; j++, k++) { 5403 cj[k] = *aj++; 5404 ca[k] = *aa++; 5405 } 5406 /* off-diagonal portion of A */ 5407 for (j = 0; j < ncols_o; j++, k++) { 5408 cj[k] = dn + *bj++; 5409 ca[k] = *ba++; 5410 } 5411 } 5412 /* put together the new matrix */ 5413 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5414 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5415 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5416 c = (Mat_SeqAIJ *)(*A_loc)->data; 5417 c->free_a = PETSC_TRUE; 5418 c->free_ij = PETSC_TRUE; 5419 c->nonew = 0; 5420 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5421 } else if (scall == MAT_REUSE_MATRIX) { 5422 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5423 for (i = 0; i < am; i++) { 5424 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5425 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5426 /* diagonal portion of A */ 5427 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5428 /* off-diagonal portion of A */ 5429 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5430 } 5431 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5432 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5433 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5434 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5435 if (glob) { 5436 PetscInt cst, *gidx; 5437 5438 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5439 PetscCall(PetscMalloc1(dn + on, &gidx)); 5440 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5441 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5442 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5443 } 5444 } 5445 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5446 PetscFunctionReturn(PETSC_SUCCESS); 5447 } 5448 5449 /*@C 5450 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5451 5452 Not Collective 5453 5454 Input Parameters: 5455 + A - the matrix 5456 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5457 . row - index set of rows to extract (or `NULL`) 5458 - col - index set of columns to extract (or `NULL`) 5459 5460 Output Parameter: 5461 . A_loc - the local sequential matrix generated 5462 5463 Level: developer 5464 5465 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5466 @*/ 5467 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5468 { 5469 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5470 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5471 IS isrowa, iscola; 5472 Mat *aloc; 5473 PetscBool match; 5474 5475 PetscFunctionBegin; 5476 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5477 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5478 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5479 if (!row) { 5480 start = A->rmap->rstart; 5481 end = A->rmap->rend; 5482 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5483 } else { 5484 isrowa = *row; 5485 } 5486 if (!col) { 5487 start = A->cmap->rstart; 5488 cmap = a->garray; 5489 nzA = a->A->cmap->n; 5490 nzB = a->B->cmap->n; 5491 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5492 ncols = 0; 5493 for (i = 0; i < nzB; i++) { 5494 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5495 else break; 5496 } 5497 imark = i; 5498 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5499 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5500 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5501 } else { 5502 iscola = *col; 5503 } 5504 if (scall != MAT_INITIAL_MATRIX) { 5505 PetscCall(PetscMalloc1(1, &aloc)); 5506 aloc[0] = *A_loc; 5507 } 5508 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5509 if (!col) { /* attach global id of condensed columns */ 5510 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5511 } 5512 *A_loc = aloc[0]; 5513 PetscCall(PetscFree(aloc)); 5514 if (!row) PetscCall(ISDestroy(&isrowa)); 5515 if (!col) PetscCall(ISDestroy(&iscola)); 5516 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5517 PetscFunctionReturn(PETSC_SUCCESS); 5518 } 5519 5520 /* 5521 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5522 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5523 * on a global size. 5524 * */ 5525 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5526 { 5527 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5528 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5529 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5530 PetscMPIInt owner; 5531 PetscSFNode *iremote, *oiremote; 5532 const PetscInt *lrowindices; 5533 PetscSF sf, osf; 5534 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5535 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5536 MPI_Comm comm; 5537 ISLocalToGlobalMapping mapping; 5538 const PetscScalar *pd_a, *po_a; 5539 5540 PetscFunctionBegin; 5541 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5542 /* plocalsize is the number of roots 5543 * nrows is the number of leaves 5544 * */ 5545 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5546 PetscCall(ISGetLocalSize(rows, &nrows)); 5547 PetscCall(PetscCalloc1(nrows, &iremote)); 5548 PetscCall(ISGetIndices(rows, &lrowindices)); 5549 for (i = 0; i < nrows; i++) { 5550 /* Find a remote index and an owner for a row 5551 * The row could be local or remote 5552 * */ 5553 owner = 0; 5554 lidx = 0; 5555 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5556 iremote[i].index = lidx; 5557 iremote[i].rank = owner; 5558 } 5559 /* Create SF to communicate how many nonzero columns for each row */ 5560 PetscCall(PetscSFCreate(comm, &sf)); 5561 /* SF will figure out the number of nonzero columns for each row, and their 5562 * offsets 5563 * */ 5564 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5565 PetscCall(PetscSFSetFromOptions(sf)); 5566 PetscCall(PetscSFSetUp(sf)); 5567 5568 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5569 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5570 PetscCall(PetscCalloc1(nrows, &pnnz)); 5571 roffsets[0] = 0; 5572 roffsets[1] = 0; 5573 for (i = 0; i < plocalsize; i++) { 5574 /* diagonal */ 5575 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5576 /* off-diagonal */ 5577 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5578 /* compute offsets so that we relative location for each row */ 5579 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5580 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5581 } 5582 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5583 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5584 /* 'r' means root, and 'l' means leaf */ 5585 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5586 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5587 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5588 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5589 PetscCall(PetscSFDestroy(&sf)); 5590 PetscCall(PetscFree(roffsets)); 5591 PetscCall(PetscFree(nrcols)); 5592 dntotalcols = 0; 5593 ontotalcols = 0; 5594 ncol = 0; 5595 for (i = 0; i < nrows; i++) { 5596 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5597 ncol = PetscMax(pnnz[i], ncol); 5598 /* diagonal */ 5599 dntotalcols += nlcols[i * 2 + 0]; 5600 /* off-diagonal */ 5601 ontotalcols += nlcols[i * 2 + 1]; 5602 } 5603 /* We do not need to figure the right number of columns 5604 * since all the calculations will be done by going through the raw data 5605 * */ 5606 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5607 PetscCall(MatSetUp(*P_oth)); 5608 PetscCall(PetscFree(pnnz)); 5609 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5610 /* diagonal */ 5611 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5612 /* off-diagonal */ 5613 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5614 /* diagonal */ 5615 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5616 /* off-diagonal */ 5617 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5618 dntotalcols = 0; 5619 ontotalcols = 0; 5620 ntotalcols = 0; 5621 for (i = 0; i < nrows; i++) { 5622 owner = 0; 5623 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5624 /* Set iremote for diag matrix */ 5625 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5626 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5627 iremote[dntotalcols].rank = owner; 5628 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5629 ilocal[dntotalcols++] = ntotalcols++; 5630 } 5631 /* off-diagonal */ 5632 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5633 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5634 oiremote[ontotalcols].rank = owner; 5635 oilocal[ontotalcols++] = ntotalcols++; 5636 } 5637 } 5638 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5639 PetscCall(PetscFree(loffsets)); 5640 PetscCall(PetscFree(nlcols)); 5641 PetscCall(PetscSFCreate(comm, &sf)); 5642 /* P serves as roots and P_oth is leaves 5643 * Diag matrix 5644 * */ 5645 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5646 PetscCall(PetscSFSetFromOptions(sf)); 5647 PetscCall(PetscSFSetUp(sf)); 5648 5649 PetscCall(PetscSFCreate(comm, &osf)); 5650 /* off-diagonal */ 5651 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5652 PetscCall(PetscSFSetFromOptions(osf)); 5653 PetscCall(PetscSFSetUp(osf)); 5654 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5655 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5656 /* operate on the matrix internal data to save memory */ 5657 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5658 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5659 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5660 /* Convert to global indices for diag matrix */ 5661 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5662 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5663 /* We want P_oth store global indices */ 5664 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5665 /* Use memory scalable approach */ 5666 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5667 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5668 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5669 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5670 /* Convert back to local indices */ 5671 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5672 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5673 nout = 0; 5674 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5675 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5676 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5677 /* Exchange values */ 5678 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5679 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5680 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5681 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5682 /* Stop PETSc from shrinking memory */ 5683 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5684 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5685 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5686 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5687 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5688 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5689 PetscCall(PetscSFDestroy(&sf)); 5690 PetscCall(PetscSFDestroy(&osf)); 5691 PetscFunctionReturn(PETSC_SUCCESS); 5692 } 5693 5694 /* 5695 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5696 * This supports MPIAIJ and MAIJ 5697 * */ 5698 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5699 { 5700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5701 Mat_SeqAIJ *p_oth; 5702 IS rows, map; 5703 PetscHMapI hamp; 5704 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5705 MPI_Comm comm; 5706 PetscSF sf, osf; 5707 PetscBool has; 5708 5709 PetscFunctionBegin; 5710 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5711 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5712 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5713 * and then create a submatrix (that often is an overlapping matrix) 5714 * */ 5715 if (reuse == MAT_INITIAL_MATRIX) { 5716 /* Use a hash table to figure out unique keys */ 5717 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5718 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5719 count = 0; 5720 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5721 for (i = 0; i < a->B->cmap->n; i++) { 5722 key = a->garray[i] / dof; 5723 PetscCall(PetscHMapIHas(hamp, key, &has)); 5724 if (!has) { 5725 mapping[i] = count; 5726 PetscCall(PetscHMapISet(hamp, key, count++)); 5727 } else { 5728 /* Current 'i' has the same value the previous step */ 5729 mapping[i] = count - 1; 5730 } 5731 } 5732 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5733 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5734 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5735 PetscCall(PetscCalloc1(htsize, &rowindices)); 5736 off = 0; 5737 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5738 PetscCall(PetscHMapIDestroy(&hamp)); 5739 PetscCall(PetscSortInt(htsize, rowindices)); 5740 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5741 /* In case, the matrix was already created but users want to recreate the matrix */ 5742 PetscCall(MatDestroy(P_oth)); 5743 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5744 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5745 PetscCall(ISDestroy(&map)); 5746 PetscCall(ISDestroy(&rows)); 5747 } else if (reuse == MAT_REUSE_MATRIX) { 5748 /* If matrix was already created, we simply update values using SF objects 5749 * that as attached to the matrix earlier. 5750 */ 5751 const PetscScalar *pd_a, *po_a; 5752 5753 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5754 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5755 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5756 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5757 /* Update values in place */ 5758 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5759 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5760 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5761 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5762 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5763 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5764 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5765 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5766 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5767 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5768 PetscFunctionReturn(PETSC_SUCCESS); 5769 } 5770 5771 /*@C 5772 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5773 5774 Collective 5775 5776 Input Parameters: 5777 + A - the first matrix in `MATMPIAIJ` format 5778 . B - the second matrix in `MATMPIAIJ` format 5779 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5780 5781 Output Parameters: 5782 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5783 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5784 - B_seq - the sequential matrix generated 5785 5786 Level: developer 5787 5788 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5789 @*/ 5790 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5791 { 5792 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5793 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5794 IS isrowb, iscolb; 5795 Mat *bseq = NULL; 5796 5797 PetscFunctionBegin; 5798 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5799 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5800 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5801 5802 if (scall == MAT_INITIAL_MATRIX) { 5803 start = A->cmap->rstart; 5804 cmap = a->garray; 5805 nzA = a->A->cmap->n; 5806 nzB = a->B->cmap->n; 5807 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5808 ncols = 0; 5809 for (i = 0; i < nzB; i++) { /* row < local row index */ 5810 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5811 else break; 5812 } 5813 imark = i; 5814 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5815 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5816 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5817 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5818 } else { 5819 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5820 isrowb = *rowb; 5821 iscolb = *colb; 5822 PetscCall(PetscMalloc1(1, &bseq)); 5823 bseq[0] = *B_seq; 5824 } 5825 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5826 *B_seq = bseq[0]; 5827 PetscCall(PetscFree(bseq)); 5828 if (!rowb) { 5829 PetscCall(ISDestroy(&isrowb)); 5830 } else { 5831 *rowb = isrowb; 5832 } 5833 if (!colb) { 5834 PetscCall(ISDestroy(&iscolb)); 5835 } else { 5836 *colb = iscolb; 5837 } 5838 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5839 PetscFunctionReturn(PETSC_SUCCESS); 5840 } 5841 5842 /* 5843 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5844 of the OFF-DIAGONAL portion of local A 5845 5846 Collective 5847 5848 Input Parameters: 5849 + A,B - the matrices in `MATMPIAIJ` format 5850 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5851 5852 Output Parameter: 5853 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5854 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5855 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5856 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5857 5858 Developer Note: 5859 This directly accesses information inside the VecScatter associated with the matrix-vector product 5860 for this matrix. This is not desirable.. 5861 5862 Level: developer 5863 5864 */ 5865 5866 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5867 { 5868 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5869 VecScatter ctx; 5870 MPI_Comm comm; 5871 const PetscMPIInt *rprocs, *sprocs; 5872 PetscMPIInt nrecvs, nsends; 5873 const PetscInt *srow, *rstarts, *sstarts; 5874 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5875 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5876 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5877 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5878 PetscMPIInt size, tag, rank, nreqs; 5879 5880 PetscFunctionBegin; 5881 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5882 PetscCallMPI(MPI_Comm_size(comm, &size)); 5883 5884 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5885 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5886 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5887 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5888 5889 if (size == 1) { 5890 startsj_s = NULL; 5891 bufa_ptr = NULL; 5892 *B_oth = NULL; 5893 PetscFunctionReturn(PETSC_SUCCESS); 5894 } 5895 5896 ctx = a->Mvctx; 5897 tag = ((PetscObject)ctx)->tag; 5898 5899 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5900 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5901 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5902 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5903 PetscCall(PetscMalloc1(nreqs, &reqs)); 5904 rwaits = reqs; 5905 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5906 5907 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5908 if (scall == MAT_INITIAL_MATRIX) { 5909 /* i-array */ 5910 /* post receives */ 5911 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5912 for (i = 0; i < nrecvs; i++) { 5913 rowlen = rvalues + rstarts[i] * rbs; 5914 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5915 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5916 } 5917 5918 /* pack the outgoing message */ 5919 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5920 5921 sstartsj[0] = 0; 5922 rstartsj[0] = 0; 5923 len = 0; /* total length of j or a array to be sent */ 5924 if (nsends) { 5925 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5926 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5927 } 5928 for (i = 0; i < nsends; i++) { 5929 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5930 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5931 for (j = 0; j < nrows; j++) { 5932 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5933 for (l = 0; l < sbs; l++) { 5934 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5935 5936 rowlen[j * sbs + l] = ncols; 5937 5938 len += ncols; 5939 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5940 } 5941 k++; 5942 } 5943 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5944 5945 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5946 } 5947 /* recvs and sends of i-array are completed */ 5948 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5949 PetscCall(PetscFree(svalues)); 5950 5951 /* allocate buffers for sending j and a arrays */ 5952 PetscCall(PetscMalloc1(len + 1, &bufj)); 5953 PetscCall(PetscMalloc1(len + 1, &bufa)); 5954 5955 /* create i-array of B_oth */ 5956 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5957 5958 b_othi[0] = 0; 5959 len = 0; /* total length of j or a array to be received */ 5960 k = 0; 5961 for (i = 0; i < nrecvs; i++) { 5962 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5963 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5964 for (j = 0; j < nrows; j++) { 5965 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5966 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5967 k++; 5968 } 5969 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5970 } 5971 PetscCall(PetscFree(rvalues)); 5972 5973 /* allocate space for j and a arrays of B_oth */ 5974 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5975 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5976 5977 /* j-array */ 5978 /* post receives of j-array */ 5979 for (i = 0; i < nrecvs; i++) { 5980 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5981 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5982 } 5983 5984 /* pack the outgoing message j-array */ 5985 if (nsends) k = sstarts[0]; 5986 for (i = 0; i < nsends; i++) { 5987 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5988 bufJ = bufj + sstartsj[i]; 5989 for (j = 0; j < nrows; j++) { 5990 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5991 for (ll = 0; ll < sbs; ll++) { 5992 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5993 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5994 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5995 } 5996 } 5997 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5998 } 5999 6000 /* recvs and sends of j-array are completed */ 6001 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6002 } else if (scall == MAT_REUSE_MATRIX) { 6003 sstartsj = *startsj_s; 6004 rstartsj = *startsj_r; 6005 bufa = *bufa_ptr; 6006 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 6007 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 6008 6009 /* a-array */ 6010 /* post receives of a-array */ 6011 for (i = 0; i < nrecvs; i++) { 6012 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6013 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6014 } 6015 6016 /* pack the outgoing message a-array */ 6017 if (nsends) k = sstarts[0]; 6018 for (i = 0; i < nsends; i++) { 6019 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6020 bufA = bufa + sstartsj[i]; 6021 for (j = 0; j < nrows; j++) { 6022 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6023 for (ll = 0; ll < sbs; ll++) { 6024 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6025 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6026 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6027 } 6028 } 6029 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6030 } 6031 /* recvs and sends of a-array are completed */ 6032 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6033 PetscCall(PetscFree(reqs)); 6034 6035 if (scall == MAT_INITIAL_MATRIX) { 6036 Mat_SeqAIJ *b_oth; 6037 6038 /* put together the new matrix */ 6039 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6040 6041 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6042 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6043 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6044 b_oth->free_a = PETSC_TRUE; 6045 b_oth->free_ij = PETSC_TRUE; 6046 b_oth->nonew = 0; 6047 6048 PetscCall(PetscFree(bufj)); 6049 if (!startsj_s || !bufa_ptr) { 6050 PetscCall(PetscFree2(sstartsj, rstartsj)); 6051 PetscCall(PetscFree(bufa_ptr)); 6052 } else { 6053 *startsj_s = sstartsj; 6054 *startsj_r = rstartsj; 6055 *bufa_ptr = bufa; 6056 } 6057 } else if (scall == MAT_REUSE_MATRIX) { 6058 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6059 } 6060 6061 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6062 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6063 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6064 PetscFunctionReturn(PETSC_SUCCESS); 6065 } 6066 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6070 #if defined(PETSC_HAVE_MKL_SPARSE) 6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6072 #endif 6073 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6075 #if defined(PETSC_HAVE_ELEMENTAL) 6076 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6077 #endif 6078 #if defined(PETSC_HAVE_SCALAPACK) 6079 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6080 #endif 6081 #if defined(PETSC_HAVE_HYPRE) 6082 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6083 #endif 6084 #if defined(PETSC_HAVE_CUDA) 6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6086 #endif 6087 #if defined(PETSC_HAVE_HIP) 6088 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6089 #endif 6090 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6091 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6092 #endif 6093 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6094 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6095 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6096 6097 /* 6098 Computes (B'*A')' since computing B*A directly is untenable 6099 6100 n p p 6101 [ ] [ ] [ ] 6102 m [ A ] * n [ B ] = m [ C ] 6103 [ ] [ ] [ ] 6104 6105 */ 6106 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6107 { 6108 Mat At, Bt, Ct; 6109 6110 PetscFunctionBegin; 6111 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6112 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6113 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6114 PetscCall(MatDestroy(&At)); 6115 PetscCall(MatDestroy(&Bt)); 6116 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6117 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6118 PetscCall(MatDestroy(&Ct)); 6119 PetscFunctionReturn(PETSC_SUCCESS); 6120 } 6121 6122 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6123 { 6124 PetscBool cisdense; 6125 6126 PetscFunctionBegin; 6127 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6128 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6129 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6130 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6131 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6132 PetscCall(MatSetUp(C)); 6133 6134 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6135 PetscFunctionReturn(PETSC_SUCCESS); 6136 } 6137 6138 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6139 { 6140 Mat_Product *product = C->product; 6141 Mat A = product->A, B = product->B; 6142 6143 PetscFunctionBegin; 6144 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6145 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6146 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6147 C->ops->productsymbolic = MatProductSymbolic_AB; 6148 PetscFunctionReturn(PETSC_SUCCESS); 6149 } 6150 6151 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6152 { 6153 Mat_Product *product = C->product; 6154 6155 PetscFunctionBegin; 6156 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6157 PetscFunctionReturn(PETSC_SUCCESS); 6158 } 6159 6160 /* 6161 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6162 6163 Input Parameters: 6164 6165 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6166 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6167 6168 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6169 6170 For Set1, j1[] contains column indices of the nonzeros. 6171 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6172 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6173 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6174 6175 Similar for Set2. 6176 6177 This routine merges the two sets of nonzeros row by row and removes repeats. 6178 6179 Output Parameters: (memory is allocated by the caller) 6180 6181 i[],j[]: the CSR of the merged matrix, which has m rows. 6182 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6183 imap2[]: similar to imap1[], but for Set2. 6184 Note we order nonzeros row-by-row and from left to right. 6185 */ 6186 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6187 { 6188 PetscInt r, m; /* Row index of mat */ 6189 PetscCount t, t1, t2, b1, e1, b2, e2; 6190 6191 PetscFunctionBegin; 6192 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6193 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6194 i[0] = 0; 6195 for (r = 0; r < m; r++) { /* Do row by row merging */ 6196 b1 = rowBegin1[r]; 6197 e1 = rowEnd1[r]; 6198 b2 = rowBegin2[r]; 6199 e2 = rowEnd2[r]; 6200 while (b1 < e1 && b2 < e2) { 6201 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6202 j[t] = j1[b1]; 6203 imap1[t1] = t; 6204 imap2[t2] = t; 6205 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6206 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6207 t1++; 6208 t2++; 6209 t++; 6210 } else if (j1[b1] < j2[b2]) { 6211 j[t] = j1[b1]; 6212 imap1[t1] = t; 6213 b1 += jmap1[t1 + 1] - jmap1[t1]; 6214 t1++; 6215 t++; 6216 } else { 6217 j[t] = j2[b2]; 6218 imap2[t2] = t; 6219 b2 += jmap2[t2 + 1] - jmap2[t2]; 6220 t2++; 6221 t++; 6222 } 6223 } 6224 /* Merge the remaining in either j1[] or j2[] */ 6225 while (b1 < e1) { 6226 j[t] = j1[b1]; 6227 imap1[t1] = t; 6228 b1 += jmap1[t1 + 1] - jmap1[t1]; 6229 t1++; 6230 t++; 6231 } 6232 while (b2 < e2) { 6233 j[t] = j2[b2]; 6234 imap2[t2] = t; 6235 b2 += jmap2[t2 + 1] - jmap2[t2]; 6236 t2++; 6237 t++; 6238 } 6239 PetscCall(PetscIntCast(t, i + r + 1)); 6240 } 6241 PetscFunctionReturn(PETSC_SUCCESS); 6242 } 6243 6244 /* 6245 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6246 6247 Input Parameters: 6248 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6249 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6250 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6251 6252 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6253 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6254 6255 Output Parameters: 6256 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6257 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6258 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6259 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6260 6261 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6262 Atot: number of entries belonging to the diagonal block. 6263 Annz: number of unique nonzeros belonging to the diagonal block. 6264 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6265 repeats (i.e., same 'i,j' pair). 6266 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6267 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6268 6269 Atot: number of entries belonging to the diagonal block 6270 Annz: number of unique nonzeros belonging to the diagonal block. 6271 6272 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6273 6274 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6275 */ 6276 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6277 { 6278 PetscInt cstart, cend, rstart, rend, row, col; 6279 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6280 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6281 PetscCount k, m, p, q, r, s, mid; 6282 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6283 6284 PetscFunctionBegin; 6285 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6286 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6287 m = rend - rstart; 6288 6289 /* Skip negative rows */ 6290 for (k = 0; k < n; k++) 6291 if (i[k] >= 0) break; 6292 6293 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6294 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6295 */ 6296 while (k < n) { 6297 row = i[k]; 6298 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6299 for (s = k; s < n; s++) 6300 if (i[s] != row) break; 6301 6302 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6303 for (p = k; p < s; p++) { 6304 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6305 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6306 } 6307 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6308 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6309 rowBegin[row - rstart] = k; 6310 rowMid[row - rstart] = mid; 6311 rowEnd[row - rstart] = s; 6312 6313 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6314 Atot += mid - k; 6315 Btot += s - mid; 6316 6317 /* Count unique nonzeros of this diag row */ 6318 for (p = k; p < mid;) { 6319 col = j[p]; 6320 do { 6321 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6322 p++; 6323 } while (p < mid && j[p] == col); 6324 Annz++; 6325 } 6326 6327 /* Count unique nonzeros of this offdiag row */ 6328 for (p = mid; p < s;) { 6329 col = j[p]; 6330 do { 6331 p++; 6332 } while (p < s && j[p] == col); 6333 Bnnz++; 6334 } 6335 k = s; 6336 } 6337 6338 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6339 PetscCall(PetscMalloc1(Atot, &Aperm)); 6340 PetscCall(PetscMalloc1(Btot, &Bperm)); 6341 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6342 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6343 6344 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6345 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6346 for (r = 0; r < m; r++) { 6347 k = rowBegin[r]; 6348 mid = rowMid[r]; 6349 s = rowEnd[r]; 6350 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6351 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6352 Atot += mid - k; 6353 Btot += s - mid; 6354 6355 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6356 for (p = k; p < mid;) { 6357 col = j[p]; 6358 q = p; 6359 do { 6360 p++; 6361 } while (p < mid && j[p] == col); 6362 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6363 Annz++; 6364 } 6365 6366 for (p = mid; p < s;) { 6367 col = j[p]; 6368 q = p; 6369 do { 6370 p++; 6371 } while (p < s && j[p] == col); 6372 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6373 Bnnz++; 6374 } 6375 } 6376 /* Output */ 6377 *Aperm_ = Aperm; 6378 *Annz_ = Annz; 6379 *Atot_ = Atot; 6380 *Ajmap_ = Ajmap; 6381 *Bperm_ = Bperm; 6382 *Bnnz_ = Bnnz; 6383 *Btot_ = Btot; 6384 *Bjmap_ = Bjmap; 6385 PetscFunctionReturn(PETSC_SUCCESS); 6386 } 6387 6388 /* 6389 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6390 6391 Input Parameters: 6392 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6393 nnz: number of unique nonzeros in the merged matrix 6394 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6395 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6396 6397 Output Parameter: (memory is allocated by the caller) 6398 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6399 6400 Example: 6401 nnz1 = 4 6402 nnz = 6 6403 imap = [1,3,4,5] 6404 jmap = [0,3,5,6,7] 6405 then, 6406 jmap_new = [0,0,3,3,5,6,7] 6407 */ 6408 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6409 { 6410 PetscCount k, p; 6411 6412 PetscFunctionBegin; 6413 jmap_new[0] = 0; 6414 p = nnz; /* p loops over jmap_new[] backwards */ 6415 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6416 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6417 } 6418 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6419 PetscFunctionReturn(PETSC_SUCCESS); 6420 } 6421 6422 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6423 { 6424 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6425 6426 PetscFunctionBegin; 6427 PetscCall(PetscSFDestroy(&coo->sf)); 6428 PetscCall(PetscFree(coo->Aperm1)); 6429 PetscCall(PetscFree(coo->Bperm1)); 6430 PetscCall(PetscFree(coo->Ajmap1)); 6431 PetscCall(PetscFree(coo->Bjmap1)); 6432 PetscCall(PetscFree(coo->Aimap2)); 6433 PetscCall(PetscFree(coo->Bimap2)); 6434 PetscCall(PetscFree(coo->Aperm2)); 6435 PetscCall(PetscFree(coo->Bperm2)); 6436 PetscCall(PetscFree(coo->Ajmap2)); 6437 PetscCall(PetscFree(coo->Bjmap2)); 6438 PetscCall(PetscFree(coo->Cperm1)); 6439 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6440 PetscCall(PetscFree(coo)); 6441 PetscFunctionReturn(PETSC_SUCCESS); 6442 } 6443 6444 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6445 { 6446 MPI_Comm comm; 6447 PetscMPIInt rank, size; 6448 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6449 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6450 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6451 PetscContainer container; 6452 MatCOOStruct_MPIAIJ *coo; 6453 6454 PetscFunctionBegin; 6455 PetscCall(PetscFree(mpiaij->garray)); 6456 PetscCall(VecDestroy(&mpiaij->lvec)); 6457 #if defined(PETSC_USE_CTABLE) 6458 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6459 #else 6460 PetscCall(PetscFree(mpiaij->colmap)); 6461 #endif 6462 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6463 mat->assembled = PETSC_FALSE; 6464 mat->was_assembled = PETSC_FALSE; 6465 6466 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6467 PetscCallMPI(MPI_Comm_size(comm, &size)); 6468 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6469 PetscCall(PetscLayoutSetUp(mat->rmap)); 6470 PetscCall(PetscLayoutSetUp(mat->cmap)); 6471 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6472 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6473 PetscCall(MatGetLocalSize(mat, &m, &n)); 6474 PetscCall(MatGetSize(mat, &M, &N)); 6475 6476 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6477 /* entries come first, then local rows, then remote rows. */ 6478 PetscCount n1 = coo_n, *perm1; 6479 PetscInt *i1 = coo_i, *j1 = coo_j; 6480 6481 PetscCall(PetscMalloc1(n1, &perm1)); 6482 for (k = 0; k < n1; k++) perm1[k] = k; 6483 6484 /* Manipulate indices so that entries with negative row or col indices will have smallest 6485 row indices, local entries will have greater but negative row indices, and remote entries 6486 will have positive row indices. 6487 */ 6488 for (k = 0; k < n1; k++) { 6489 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6490 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6491 else { 6492 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6493 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6494 } 6495 } 6496 6497 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6498 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6499 6500 /* Advance k to the first entry we need to take care of */ 6501 for (k = 0; k < n1; k++) 6502 if (i1[k] > PETSC_INT_MIN) break; 6503 PetscCount i1start = k; 6504 6505 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6506 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6507 6508 /* Send remote rows to their owner */ 6509 /* Find which rows should be sent to which remote ranks*/ 6510 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6511 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6512 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6513 const PetscInt *ranges; 6514 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6515 6516 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6517 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6518 for (k = rem; k < n1;) { 6519 PetscMPIInt owner; 6520 PetscInt firstRow, lastRow; 6521 6522 /* Locate a row range */ 6523 firstRow = i1[k]; /* first row of this owner */ 6524 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6525 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6526 6527 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6528 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6529 6530 /* All entries in [k,p) belong to this remote owner */ 6531 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6532 PetscMPIInt *sendto2; 6533 PetscInt *nentries2; 6534 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6535 6536 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6537 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6538 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6539 PetscCall(PetscFree2(sendto, nentries2)); 6540 sendto = sendto2; 6541 nentries = nentries2; 6542 maxNsend = maxNsend2; 6543 } 6544 sendto[nsend] = owner; 6545 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6546 nsend++; 6547 k = p; 6548 } 6549 6550 /* Build 1st SF to know offsets on remote to send data */ 6551 PetscSF sf1; 6552 PetscInt nroots = 1, nroots2 = 0; 6553 PetscInt nleaves = nsend, nleaves2 = 0; 6554 PetscInt *offsets; 6555 PetscSFNode *iremote; 6556 6557 PetscCall(PetscSFCreate(comm, &sf1)); 6558 PetscCall(PetscMalloc1(nsend, &iremote)); 6559 PetscCall(PetscMalloc1(nsend, &offsets)); 6560 for (k = 0; k < nsend; k++) { 6561 iremote[k].rank = sendto[k]; 6562 iremote[k].index = 0; 6563 nleaves2 += nentries[k]; 6564 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6565 } 6566 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6567 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6568 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6569 PetscCall(PetscSFDestroy(&sf1)); 6570 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6571 6572 /* Build 2nd SF to send remote COOs to their owner */ 6573 PetscSF sf2; 6574 nroots = nroots2; 6575 nleaves = nleaves2; 6576 PetscCall(PetscSFCreate(comm, &sf2)); 6577 PetscCall(PetscSFSetFromOptions(sf2)); 6578 PetscCall(PetscMalloc1(nleaves, &iremote)); 6579 p = 0; 6580 for (k = 0; k < nsend; k++) { 6581 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6582 for (q = 0; q < nentries[k]; q++, p++) { 6583 iremote[p].rank = sendto[k]; 6584 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6585 } 6586 } 6587 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6588 6589 /* Send the remote COOs to their owner */ 6590 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6591 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6592 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6593 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6594 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6595 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6596 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6597 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6598 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6599 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6600 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6601 6602 PetscCall(PetscFree(offsets)); 6603 PetscCall(PetscFree2(sendto, nentries)); 6604 6605 /* Sort received COOs by row along with the permutation array */ 6606 for (k = 0; k < n2; k++) perm2[k] = k; 6607 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6608 6609 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6610 PetscCount *Cperm1; 6611 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6612 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6613 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6614 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6615 6616 /* Support for HYPRE matrices, kind of a hack. 6617 Swap min column with diagonal so that diagonal values will go first */ 6618 PetscBool hypre; 6619 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6620 if (hypre) { 6621 PetscInt *minj; 6622 PetscBT hasdiag; 6623 6624 PetscCall(PetscBTCreate(m, &hasdiag)); 6625 PetscCall(PetscMalloc1(m, &minj)); 6626 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6627 for (k = i1start; k < rem; k++) { 6628 if (j1[k] < cstart || j1[k] >= cend) continue; 6629 const PetscInt rindex = i1[k] - rstart; 6630 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6631 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6632 } 6633 for (k = 0; k < n2; k++) { 6634 if (j2[k] < cstart || j2[k] >= cend) continue; 6635 const PetscInt rindex = i2[k] - rstart; 6636 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6637 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6638 } 6639 for (k = i1start; k < rem; k++) { 6640 const PetscInt rindex = i1[k] - rstart; 6641 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6642 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6643 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6644 } 6645 for (k = 0; k < n2; k++) { 6646 const PetscInt rindex = i2[k] - rstart; 6647 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6648 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6649 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6650 } 6651 PetscCall(PetscBTDestroy(&hasdiag)); 6652 PetscCall(PetscFree(minj)); 6653 } 6654 6655 /* Split local COOs and received COOs into diag/offdiag portions */ 6656 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6657 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6658 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6659 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6660 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6661 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6662 6663 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6664 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6665 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6666 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6667 6668 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6669 PetscInt *Ai, *Bi; 6670 PetscInt *Aj, *Bj; 6671 6672 PetscCall(PetscMalloc1(m + 1, &Ai)); 6673 PetscCall(PetscMalloc1(m + 1, &Bi)); 6674 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6675 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6676 6677 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6678 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6679 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6680 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6681 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6682 6683 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6684 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6685 6686 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6687 /* expect nonzeros in A/B most likely have local contributing entries */ 6688 PetscInt Annz = Ai[m]; 6689 PetscInt Bnnz = Bi[m]; 6690 PetscCount *Ajmap1_new, *Bjmap1_new; 6691 6692 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6693 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6694 6695 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6696 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6697 6698 PetscCall(PetscFree(Aimap1)); 6699 PetscCall(PetscFree(Ajmap1)); 6700 PetscCall(PetscFree(Bimap1)); 6701 PetscCall(PetscFree(Bjmap1)); 6702 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6703 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6704 PetscCall(PetscFree(perm1)); 6705 PetscCall(PetscFree3(i2, j2, perm2)); 6706 6707 Ajmap1 = Ajmap1_new; 6708 Bjmap1 = Bjmap1_new; 6709 6710 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6711 if (Annz < Annz1 + Annz2) { 6712 PetscInt *Aj_new; 6713 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6714 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6715 PetscCall(PetscFree(Aj)); 6716 Aj = Aj_new; 6717 } 6718 6719 if (Bnnz < Bnnz1 + Bnnz2) { 6720 PetscInt *Bj_new; 6721 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6722 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6723 PetscCall(PetscFree(Bj)); 6724 Bj = Bj_new; 6725 } 6726 6727 /* Create new submatrices for on-process and off-process coupling */ 6728 PetscScalar *Aa, *Ba; 6729 MatType rtype; 6730 Mat_SeqAIJ *a, *b; 6731 PetscObjectState state; 6732 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6733 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6734 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6735 if (cstart) { 6736 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6737 } 6738 6739 PetscCall(MatGetRootType_Private(mat, &rtype)); 6740 6741 MatSeqXAIJGetOptions_Private(mpiaij->A); 6742 PetscCall(MatDestroy(&mpiaij->A)); 6743 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6744 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6745 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6746 6747 MatSeqXAIJGetOptions_Private(mpiaij->B); 6748 PetscCall(MatDestroy(&mpiaij->B)); 6749 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6750 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6751 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6752 6753 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6754 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6755 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6756 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6757 6758 a = (Mat_SeqAIJ *)mpiaij->A->data; 6759 b = (Mat_SeqAIJ *)mpiaij->B->data; 6760 a->free_a = PETSC_TRUE; 6761 a->free_ij = PETSC_TRUE; 6762 b->free_a = PETSC_TRUE; 6763 b->free_ij = PETSC_TRUE; 6764 a->maxnz = a->nz; 6765 b->maxnz = b->nz; 6766 6767 /* conversion must happen AFTER multiply setup */ 6768 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6769 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6770 PetscCall(VecDestroy(&mpiaij->lvec)); 6771 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6772 6773 // Put the COO struct in a container and then attach that to the matrix 6774 PetscCall(PetscMalloc1(1, &coo)); 6775 coo->n = coo_n; 6776 coo->sf = sf2; 6777 coo->sendlen = nleaves; 6778 coo->recvlen = nroots; 6779 coo->Annz = Annz; 6780 coo->Bnnz = Bnnz; 6781 coo->Annz2 = Annz2; 6782 coo->Bnnz2 = Bnnz2; 6783 coo->Atot1 = Atot1; 6784 coo->Atot2 = Atot2; 6785 coo->Btot1 = Btot1; 6786 coo->Btot2 = Btot2; 6787 coo->Ajmap1 = Ajmap1; 6788 coo->Aperm1 = Aperm1; 6789 coo->Bjmap1 = Bjmap1; 6790 coo->Bperm1 = Bperm1; 6791 coo->Aimap2 = Aimap2; 6792 coo->Ajmap2 = Ajmap2; 6793 coo->Aperm2 = Aperm2; 6794 coo->Bimap2 = Bimap2; 6795 coo->Bjmap2 = Bjmap2; 6796 coo->Bperm2 = Bperm2; 6797 coo->Cperm1 = Cperm1; 6798 // Allocate in preallocation. If not used, it has zero cost on host 6799 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6800 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6801 PetscCall(PetscContainerSetPointer(container, coo)); 6802 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6803 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6804 PetscCall(PetscContainerDestroy(&container)); 6805 PetscFunctionReturn(PETSC_SUCCESS); 6806 } 6807 6808 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6809 { 6810 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6811 Mat A = mpiaij->A, B = mpiaij->B; 6812 PetscScalar *Aa, *Ba; 6813 PetscScalar *sendbuf, *recvbuf; 6814 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6815 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6816 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6817 const PetscCount *Cperm1; 6818 PetscContainer container; 6819 MatCOOStruct_MPIAIJ *coo; 6820 6821 PetscFunctionBegin; 6822 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6823 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6824 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6825 sendbuf = coo->sendbuf; 6826 recvbuf = coo->recvbuf; 6827 Ajmap1 = coo->Ajmap1; 6828 Ajmap2 = coo->Ajmap2; 6829 Aimap2 = coo->Aimap2; 6830 Bjmap1 = coo->Bjmap1; 6831 Bjmap2 = coo->Bjmap2; 6832 Bimap2 = coo->Bimap2; 6833 Aperm1 = coo->Aperm1; 6834 Aperm2 = coo->Aperm2; 6835 Bperm1 = coo->Bperm1; 6836 Bperm2 = coo->Bperm2; 6837 Cperm1 = coo->Cperm1; 6838 6839 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6840 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6841 6842 /* Pack entries to be sent to remote */ 6843 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6844 6845 /* Send remote entries to their owner and overlap the communication with local computation */ 6846 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6847 /* Add local entries to A and B */ 6848 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6849 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6850 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6851 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6852 } 6853 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6854 PetscScalar sum = 0.0; 6855 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6856 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6857 } 6858 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6859 6860 /* Add received remote entries to A and B */ 6861 for (PetscCount i = 0; i < coo->Annz2; i++) { 6862 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6863 } 6864 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6865 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6866 } 6867 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6868 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6869 PetscFunctionReturn(PETSC_SUCCESS); 6870 } 6871 6872 /*MC 6873 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6874 6875 Options Database Keys: 6876 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6877 6878 Level: beginner 6879 6880 Notes: 6881 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6882 in this case the values associated with the rows and columns one passes in are set to zero 6883 in the matrix 6884 6885 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6886 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6887 6888 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6889 M*/ 6890 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6891 { 6892 Mat_MPIAIJ *b; 6893 PetscMPIInt size; 6894 6895 PetscFunctionBegin; 6896 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6897 6898 PetscCall(PetscNew(&b)); 6899 B->data = (void *)b; 6900 B->ops[0] = MatOps_Values; 6901 B->assembled = PETSC_FALSE; 6902 B->insertmode = NOT_SET_VALUES; 6903 b->size = size; 6904 6905 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6906 6907 /* build cache for off array entries formed */ 6908 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6909 6910 b->donotstash = PETSC_FALSE; 6911 b->colmap = NULL; 6912 b->garray = NULL; 6913 b->roworiented = PETSC_TRUE; 6914 6915 /* stuff used for matrix vector multiply */ 6916 b->lvec = NULL; 6917 b->Mvctx = NULL; 6918 6919 /* stuff for MatGetRow() */ 6920 b->rowindices = NULL; 6921 b->rowvalues = NULL; 6922 b->getrowactive = PETSC_FALSE; 6923 6924 /* flexible pointer used in CUSPARSE classes */ 6925 b->spptr = NULL; 6926 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6938 #if defined(PETSC_HAVE_CUDA) 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6940 #endif 6941 #if defined(PETSC_HAVE_HIP) 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6943 #endif 6944 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6946 #endif 6947 #if defined(PETSC_HAVE_MKL_SPARSE) 6948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6949 #endif 6950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6951 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6952 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6954 #if defined(PETSC_HAVE_ELEMENTAL) 6955 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6956 #endif 6957 #if defined(PETSC_HAVE_SCALAPACK) 6958 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6959 #endif 6960 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6961 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6962 #if defined(PETSC_HAVE_HYPRE) 6963 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6964 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6965 #endif 6966 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6967 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6968 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6969 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6970 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6971 PetscFunctionReturn(PETSC_SUCCESS); 6972 } 6973 6974 /*@ 6975 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6976 and "off-diagonal" part of the matrix in CSR format. 6977 6978 Collective 6979 6980 Input Parameters: 6981 + comm - MPI communicator 6982 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6983 . n - This value should be the same as the local size used in creating the 6984 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6985 calculated if `N` is given) For square matrices `n` is almost always `m`. 6986 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6987 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6988 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6989 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6990 . a - matrix values 6991 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6992 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6993 - oa - matrix values 6994 6995 Output Parameter: 6996 . mat - the matrix 6997 6998 Level: advanced 6999 7000 Notes: 7001 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 7002 must free the arrays once the matrix has been destroyed and not before. 7003 7004 The `i` and `j` indices are 0 based 7005 7006 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 7007 7008 This sets local rows and cannot be used to set off-processor values. 7009 7010 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 7011 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 7012 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 7013 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 7014 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7015 communication if it is known that only local entries will be set. 7016 7017 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7018 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7019 @*/ 7020 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7021 { 7022 Mat_MPIAIJ *maij; 7023 7024 PetscFunctionBegin; 7025 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7026 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7027 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7028 PetscCall(MatCreate(comm, mat)); 7029 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7030 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7031 maij = (Mat_MPIAIJ *)(*mat)->data; 7032 7033 (*mat)->preallocated = PETSC_TRUE; 7034 7035 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7036 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7037 7038 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7039 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7040 7041 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7042 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7043 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7044 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7045 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7046 PetscFunctionReturn(PETSC_SUCCESS); 7047 } 7048 7049 typedef struct { 7050 Mat *mp; /* intermediate products */ 7051 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7052 PetscInt cp; /* number of intermediate products */ 7053 7054 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7055 PetscInt *startsj_s, *startsj_r; 7056 PetscScalar *bufa; 7057 Mat P_oth; 7058 7059 /* may take advantage of merging product->B */ 7060 Mat Bloc; /* B-local by merging diag and off-diag */ 7061 7062 /* cusparse does not have support to split between symbolic and numeric phases. 7063 When api_user is true, we don't need to update the numerical values 7064 of the temporary storage */ 7065 PetscBool reusesym; 7066 7067 /* support for COO values insertion */ 7068 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7069 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7070 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7071 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7072 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7073 PetscMemType mtype; 7074 7075 /* customization */ 7076 PetscBool abmerge; 7077 PetscBool P_oth_bind; 7078 } MatMatMPIAIJBACKEND; 7079 7080 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7081 { 7082 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7083 PetscInt i; 7084 7085 PetscFunctionBegin; 7086 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7087 PetscCall(PetscFree(mmdata->bufa)); 7088 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7089 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7090 PetscCall(MatDestroy(&mmdata->P_oth)); 7091 PetscCall(MatDestroy(&mmdata->Bloc)); 7092 PetscCall(PetscSFDestroy(&mmdata->sf)); 7093 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7094 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7095 PetscCall(PetscFree(mmdata->own[0])); 7096 PetscCall(PetscFree(mmdata->own)); 7097 PetscCall(PetscFree(mmdata->off[0])); 7098 PetscCall(PetscFree(mmdata->off)); 7099 PetscCall(PetscFree(mmdata)); 7100 PetscFunctionReturn(PETSC_SUCCESS); 7101 } 7102 7103 /* Copy selected n entries with indices in idx[] of A to v[]. 7104 If idx is NULL, copy the whole data array of A to v[] 7105 */ 7106 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7107 { 7108 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7109 7110 PetscFunctionBegin; 7111 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7112 if (f) { 7113 PetscCall((*f)(A, n, idx, v)); 7114 } else { 7115 const PetscScalar *vv; 7116 7117 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7118 if (n && idx) { 7119 PetscScalar *w = v; 7120 const PetscInt *oi = idx; 7121 PetscInt j; 7122 7123 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7124 } else { 7125 PetscCall(PetscArraycpy(v, vv, n)); 7126 } 7127 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7128 } 7129 PetscFunctionReturn(PETSC_SUCCESS); 7130 } 7131 7132 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7133 { 7134 MatMatMPIAIJBACKEND *mmdata; 7135 PetscInt i, n_d, n_o; 7136 7137 PetscFunctionBegin; 7138 MatCheckProduct(C, 1); 7139 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7140 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7141 if (!mmdata->reusesym) { /* update temporary matrices */ 7142 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7143 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7144 } 7145 mmdata->reusesym = PETSC_FALSE; 7146 7147 for (i = 0; i < mmdata->cp; i++) { 7148 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7149 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7150 } 7151 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7152 PetscInt noff; 7153 7154 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7155 if (mmdata->mptmp[i]) continue; 7156 if (noff) { 7157 PetscInt nown; 7158 7159 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7160 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7161 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7162 n_o += noff; 7163 n_d += nown; 7164 } else { 7165 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7166 7167 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7168 n_d += mm->nz; 7169 } 7170 } 7171 if (mmdata->hasoffproc) { /* offprocess insertion */ 7172 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7173 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7174 } 7175 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7176 PetscFunctionReturn(PETSC_SUCCESS); 7177 } 7178 7179 /* Support for Pt * A, A * P, or Pt * A * P */ 7180 #define MAX_NUMBER_INTERMEDIATE 4 7181 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7182 { 7183 Mat_Product *product = C->product; 7184 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7185 Mat_MPIAIJ *a, *p; 7186 MatMatMPIAIJBACKEND *mmdata; 7187 ISLocalToGlobalMapping P_oth_l2g = NULL; 7188 IS glob = NULL; 7189 const char *prefix; 7190 char pprefix[256]; 7191 const PetscInt *globidx, *P_oth_idx; 7192 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7193 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7194 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7195 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7196 /* a base offset; type-2: sparse with a local to global map table */ 7197 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7198 7199 MatProductType ptype; 7200 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7201 PetscMPIInt size; 7202 7203 PetscFunctionBegin; 7204 MatCheckProduct(C, 1); 7205 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7206 ptype = product->type; 7207 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7208 ptype = MATPRODUCT_AB; 7209 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7210 } 7211 switch (ptype) { 7212 case MATPRODUCT_AB: 7213 A = product->A; 7214 P = product->B; 7215 m = A->rmap->n; 7216 n = P->cmap->n; 7217 M = A->rmap->N; 7218 N = P->cmap->N; 7219 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7220 break; 7221 case MATPRODUCT_AtB: 7222 P = product->A; 7223 A = product->B; 7224 m = P->cmap->n; 7225 n = A->cmap->n; 7226 M = P->cmap->N; 7227 N = A->cmap->N; 7228 hasoffproc = PETSC_TRUE; 7229 break; 7230 case MATPRODUCT_PtAP: 7231 A = product->A; 7232 P = product->B; 7233 m = P->cmap->n; 7234 n = P->cmap->n; 7235 M = P->cmap->N; 7236 N = P->cmap->N; 7237 hasoffproc = PETSC_TRUE; 7238 break; 7239 default: 7240 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7241 } 7242 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7243 if (size == 1) hasoffproc = PETSC_FALSE; 7244 7245 /* defaults */ 7246 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7247 mp[i] = NULL; 7248 mptmp[i] = PETSC_FALSE; 7249 rmapt[i] = -1; 7250 cmapt[i] = -1; 7251 rmapa[i] = NULL; 7252 cmapa[i] = NULL; 7253 } 7254 7255 /* customization */ 7256 PetscCall(PetscNew(&mmdata)); 7257 mmdata->reusesym = product->api_user; 7258 if (ptype == MATPRODUCT_AB) { 7259 if (product->api_user) { 7260 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7261 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7262 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7263 PetscOptionsEnd(); 7264 } else { 7265 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7266 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7267 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7268 PetscOptionsEnd(); 7269 } 7270 } else if (ptype == MATPRODUCT_PtAP) { 7271 if (product->api_user) { 7272 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7273 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7274 PetscOptionsEnd(); 7275 } else { 7276 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7277 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7278 PetscOptionsEnd(); 7279 } 7280 } 7281 a = (Mat_MPIAIJ *)A->data; 7282 p = (Mat_MPIAIJ *)P->data; 7283 PetscCall(MatSetSizes(C, m, n, M, N)); 7284 PetscCall(PetscLayoutSetUp(C->rmap)); 7285 PetscCall(PetscLayoutSetUp(C->cmap)); 7286 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7287 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7288 7289 cp = 0; 7290 switch (ptype) { 7291 case MATPRODUCT_AB: /* A * P */ 7292 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7293 7294 /* A_diag * P_local (merged or not) */ 7295 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7296 /* P is product->B */ 7297 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7298 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7299 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7300 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7301 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7302 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7303 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7304 mp[cp]->product->api_user = product->api_user; 7305 PetscCall(MatProductSetFromOptions(mp[cp])); 7306 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7307 PetscCall(ISGetIndices(glob, &globidx)); 7308 rmapt[cp] = 1; 7309 cmapt[cp] = 2; 7310 cmapa[cp] = globidx; 7311 mptmp[cp] = PETSC_FALSE; 7312 cp++; 7313 } else { /* A_diag * P_diag and A_diag * P_off */ 7314 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7315 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7316 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7317 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7318 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7319 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7320 mp[cp]->product->api_user = product->api_user; 7321 PetscCall(MatProductSetFromOptions(mp[cp])); 7322 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7323 rmapt[cp] = 1; 7324 cmapt[cp] = 1; 7325 mptmp[cp] = PETSC_FALSE; 7326 cp++; 7327 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 rmapt[cp] = 1; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = p->garray; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 } 7342 7343 /* A_off * P_other */ 7344 if (mmdata->P_oth) { 7345 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7346 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7347 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7348 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7349 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7350 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7351 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7352 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7353 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7354 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7355 mp[cp]->product->api_user = product->api_user; 7356 PetscCall(MatProductSetFromOptions(mp[cp])); 7357 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7358 rmapt[cp] = 1; 7359 cmapt[cp] = 2; 7360 cmapa[cp] = P_oth_idx; 7361 mptmp[cp] = PETSC_FALSE; 7362 cp++; 7363 } 7364 break; 7365 7366 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7367 /* A is product->B */ 7368 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7369 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7370 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7371 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7372 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7373 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7374 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7375 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7376 mp[cp]->product->api_user = product->api_user; 7377 PetscCall(MatProductSetFromOptions(mp[cp])); 7378 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7379 PetscCall(ISGetIndices(glob, &globidx)); 7380 rmapt[cp] = 2; 7381 rmapa[cp] = globidx; 7382 cmapt[cp] = 2; 7383 cmapa[cp] = globidx; 7384 mptmp[cp] = PETSC_FALSE; 7385 cp++; 7386 } else { 7387 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7388 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7389 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7390 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7391 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7392 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7393 mp[cp]->product->api_user = product->api_user; 7394 PetscCall(MatProductSetFromOptions(mp[cp])); 7395 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7396 PetscCall(ISGetIndices(glob, &globidx)); 7397 rmapt[cp] = 1; 7398 cmapt[cp] = 2; 7399 cmapa[cp] = globidx; 7400 mptmp[cp] = PETSC_FALSE; 7401 cp++; 7402 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7403 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7404 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7405 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7406 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7407 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7408 mp[cp]->product->api_user = product->api_user; 7409 PetscCall(MatProductSetFromOptions(mp[cp])); 7410 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7411 rmapt[cp] = 2; 7412 rmapa[cp] = p->garray; 7413 cmapt[cp] = 2; 7414 cmapa[cp] = globidx; 7415 mptmp[cp] = PETSC_FALSE; 7416 cp++; 7417 } 7418 break; 7419 case MATPRODUCT_PtAP: 7420 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7421 /* P is product->B */ 7422 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7423 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7424 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7425 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7426 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7427 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7428 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7429 mp[cp]->product->api_user = product->api_user; 7430 PetscCall(MatProductSetFromOptions(mp[cp])); 7431 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7432 PetscCall(ISGetIndices(glob, &globidx)); 7433 rmapt[cp] = 2; 7434 rmapa[cp] = globidx; 7435 cmapt[cp] = 2; 7436 cmapa[cp] = globidx; 7437 mptmp[cp] = PETSC_FALSE; 7438 cp++; 7439 if (mmdata->P_oth) { 7440 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7441 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7442 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7443 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7444 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7445 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7446 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7447 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7448 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7449 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7450 mp[cp]->product->api_user = product->api_user; 7451 PetscCall(MatProductSetFromOptions(mp[cp])); 7452 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7453 mptmp[cp] = PETSC_TRUE; 7454 cp++; 7455 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7456 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7457 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7458 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7459 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7460 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7461 mp[cp]->product->api_user = product->api_user; 7462 PetscCall(MatProductSetFromOptions(mp[cp])); 7463 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7464 rmapt[cp] = 2; 7465 rmapa[cp] = globidx; 7466 cmapt[cp] = 2; 7467 cmapa[cp] = P_oth_idx; 7468 mptmp[cp] = PETSC_FALSE; 7469 cp++; 7470 } 7471 break; 7472 default: 7473 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7474 } 7475 /* sanity check */ 7476 if (size > 1) 7477 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7478 7479 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7480 for (i = 0; i < cp; i++) { 7481 mmdata->mp[i] = mp[i]; 7482 mmdata->mptmp[i] = mptmp[i]; 7483 } 7484 mmdata->cp = cp; 7485 C->product->data = mmdata; 7486 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7487 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7488 7489 /* memory type */ 7490 mmdata->mtype = PETSC_MEMTYPE_HOST; 7491 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7492 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7493 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7494 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7495 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7496 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7497 7498 /* prepare coo coordinates for values insertion */ 7499 7500 /* count total nonzeros of those intermediate seqaij Mats 7501 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7502 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7503 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7504 */ 7505 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7506 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7507 if (mptmp[cp]) continue; 7508 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7509 const PetscInt *rmap = rmapa[cp]; 7510 const PetscInt mr = mp[cp]->rmap->n; 7511 const PetscInt rs = C->rmap->rstart; 7512 const PetscInt re = C->rmap->rend; 7513 const PetscInt *ii = mm->i; 7514 for (i = 0; i < mr; i++) { 7515 const PetscInt gr = rmap[i]; 7516 const PetscInt nz = ii[i + 1] - ii[i]; 7517 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7518 else ncoo_oown += nz; /* this row is local */ 7519 } 7520 } else ncoo_d += mm->nz; 7521 } 7522 7523 /* 7524 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7525 7526 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7527 7528 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7529 7530 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7531 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7532 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7533 7534 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7535 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7536 */ 7537 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7538 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7539 7540 /* gather (i,j) of nonzeros inserted by remote procs */ 7541 if (hasoffproc) { 7542 PetscSF msf; 7543 PetscInt ncoo2, *coo_i2, *coo_j2; 7544 7545 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7546 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7547 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7548 7549 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7550 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7551 PetscInt *idxoff = mmdata->off[cp]; 7552 PetscInt *idxown = mmdata->own[cp]; 7553 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7554 const PetscInt *rmap = rmapa[cp]; 7555 const PetscInt *cmap = cmapa[cp]; 7556 const PetscInt *ii = mm->i; 7557 PetscInt *coi = coo_i + ncoo_o; 7558 PetscInt *coj = coo_j + ncoo_o; 7559 const PetscInt mr = mp[cp]->rmap->n; 7560 const PetscInt rs = C->rmap->rstart; 7561 const PetscInt re = C->rmap->rend; 7562 const PetscInt cs = C->cmap->rstart; 7563 for (i = 0; i < mr; i++) { 7564 const PetscInt *jj = mm->j + ii[i]; 7565 const PetscInt gr = rmap[i]; 7566 const PetscInt nz = ii[i + 1] - ii[i]; 7567 if (gr < rs || gr >= re) { /* this is an offproc row */ 7568 for (j = ii[i]; j < ii[i + 1]; j++) { 7569 *coi++ = gr; 7570 *idxoff++ = j; 7571 } 7572 if (!cmapt[cp]) { /* already global */ 7573 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7574 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7575 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7576 } else { /* offdiag */ 7577 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7578 } 7579 ncoo_o += nz; 7580 } else { /* this is a local row */ 7581 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7582 } 7583 } 7584 } 7585 mmdata->off[cp + 1] = idxoff; 7586 mmdata->own[cp + 1] = idxown; 7587 } 7588 7589 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7590 PetscInt incoo_o; 7591 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7592 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7593 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7594 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7595 ncoo = ncoo_d + ncoo_oown + ncoo2; 7596 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7597 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7598 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7599 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7600 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7601 PetscCall(PetscFree2(coo_i, coo_j)); 7602 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7603 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7604 coo_i = coo_i2; 7605 coo_j = coo_j2; 7606 } else { /* no offproc values insertion */ 7607 ncoo = ncoo_d; 7608 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7609 7610 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7611 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7612 PetscCall(PetscSFSetUp(mmdata->sf)); 7613 } 7614 mmdata->hasoffproc = hasoffproc; 7615 7616 /* gather (i,j) of nonzeros inserted locally */ 7617 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7618 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7619 PetscInt *coi = coo_i + ncoo_d; 7620 PetscInt *coj = coo_j + ncoo_d; 7621 const PetscInt *jj = mm->j; 7622 const PetscInt *ii = mm->i; 7623 const PetscInt *cmap = cmapa[cp]; 7624 const PetscInt *rmap = rmapa[cp]; 7625 const PetscInt mr = mp[cp]->rmap->n; 7626 const PetscInt rs = C->rmap->rstart; 7627 const PetscInt re = C->rmap->rend; 7628 const PetscInt cs = C->cmap->rstart; 7629 7630 if (mptmp[cp]) continue; 7631 if (rmapt[cp] == 1) { /* consecutive rows */ 7632 /* fill coo_i */ 7633 for (i = 0; i < mr; i++) { 7634 const PetscInt gr = i + rs; 7635 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7636 } 7637 /* fill coo_j */ 7638 if (!cmapt[cp]) { /* type-0, already global */ 7639 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7640 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7641 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7642 } else { /* type-2, local to global for sparse columns */ 7643 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7644 } 7645 ncoo_d += mm->nz; 7646 } else if (rmapt[cp] == 2) { /* sparse rows */ 7647 for (i = 0; i < mr; i++) { 7648 const PetscInt *jj = mm->j + ii[i]; 7649 const PetscInt gr = rmap[i]; 7650 const PetscInt nz = ii[i + 1] - ii[i]; 7651 if (gr >= rs && gr < re) { /* local rows */ 7652 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7653 if (!cmapt[cp]) { /* type-0, already global */ 7654 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7655 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7656 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7657 } else { /* type-2, local to global for sparse columns */ 7658 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7659 } 7660 ncoo_d += nz; 7661 } 7662 } 7663 } 7664 } 7665 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7666 PetscCall(ISDestroy(&glob)); 7667 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7668 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7669 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7670 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7671 7672 /* set block sizes */ 7673 A = product->A; 7674 P = product->B; 7675 switch (ptype) { 7676 case MATPRODUCT_PtAP: 7677 if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7678 break; 7679 case MATPRODUCT_RARt: 7680 if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7681 break; 7682 case MATPRODUCT_ABC: 7683 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7684 break; 7685 case MATPRODUCT_AB: 7686 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7687 break; 7688 case MATPRODUCT_AtB: 7689 if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7690 break; 7691 case MATPRODUCT_ABt: 7692 if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7693 break; 7694 default: 7695 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7696 } 7697 7698 /* preallocate with COO data */ 7699 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7700 PetscCall(PetscFree2(coo_i, coo_j)); 7701 PetscFunctionReturn(PETSC_SUCCESS); 7702 } 7703 7704 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7705 { 7706 Mat_Product *product = mat->product; 7707 #if defined(PETSC_HAVE_DEVICE) 7708 PetscBool match = PETSC_FALSE; 7709 PetscBool usecpu = PETSC_FALSE; 7710 #else 7711 PetscBool match = PETSC_TRUE; 7712 #endif 7713 7714 PetscFunctionBegin; 7715 MatCheckProduct(mat, 1); 7716 #if defined(PETSC_HAVE_DEVICE) 7717 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7718 if (match) { /* we can always fallback to the CPU if requested */ 7719 switch (product->type) { 7720 case MATPRODUCT_AB: 7721 if (product->api_user) { 7722 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7723 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7724 PetscOptionsEnd(); 7725 } else { 7726 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7727 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7728 PetscOptionsEnd(); 7729 } 7730 break; 7731 case MATPRODUCT_AtB: 7732 if (product->api_user) { 7733 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7734 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7735 PetscOptionsEnd(); 7736 } else { 7737 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7738 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7739 PetscOptionsEnd(); 7740 } 7741 break; 7742 case MATPRODUCT_PtAP: 7743 if (product->api_user) { 7744 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7745 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7746 PetscOptionsEnd(); 7747 } else { 7748 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7749 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7750 PetscOptionsEnd(); 7751 } 7752 break; 7753 default: 7754 break; 7755 } 7756 match = (PetscBool)!usecpu; 7757 } 7758 #endif 7759 if (match) { 7760 switch (product->type) { 7761 case MATPRODUCT_AB: 7762 case MATPRODUCT_AtB: 7763 case MATPRODUCT_PtAP: 7764 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7765 break; 7766 default: 7767 break; 7768 } 7769 } 7770 /* fallback to MPIAIJ ops */ 7771 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7772 PetscFunctionReturn(PETSC_SUCCESS); 7773 } 7774 7775 /* 7776 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7777 7778 n - the number of block indices in cc[] 7779 cc - the block indices (must be large enough to contain the indices) 7780 */ 7781 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7782 { 7783 PetscInt cnt = -1, nidx, j; 7784 const PetscInt *idx; 7785 7786 PetscFunctionBegin; 7787 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7788 if (nidx) { 7789 cnt = 0; 7790 cc[cnt] = idx[0] / bs; 7791 for (j = 1; j < nidx; j++) { 7792 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7793 } 7794 } 7795 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7796 *n = cnt + 1; 7797 PetscFunctionReturn(PETSC_SUCCESS); 7798 } 7799 7800 /* 7801 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7802 7803 ncollapsed - the number of block indices 7804 collapsed - the block indices (must be large enough to contain the indices) 7805 */ 7806 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7807 { 7808 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7809 7810 PetscFunctionBegin; 7811 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7812 for (i = start + 1; i < start + bs; i++) { 7813 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7814 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7815 cprevtmp = cprev; 7816 cprev = merged; 7817 merged = cprevtmp; 7818 } 7819 *ncollapsed = nprev; 7820 if (collapsed) *collapsed = cprev; 7821 PetscFunctionReturn(PETSC_SUCCESS); 7822 } 7823 7824 /* 7825 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7826 7827 Input Parameter: 7828 . Amat - matrix 7829 - symmetrize - make the result symmetric 7830 + scale - scale with diagonal 7831 7832 Output Parameter: 7833 . a_Gmat - output scalar graph >= 0 7834 7835 */ 7836 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7837 { 7838 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7839 MPI_Comm comm; 7840 Mat Gmat; 7841 PetscBool ismpiaij, isseqaij; 7842 Mat a, b, c; 7843 MatType jtype; 7844 7845 PetscFunctionBegin; 7846 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7847 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7848 PetscCall(MatGetSize(Amat, &MM, &NN)); 7849 PetscCall(MatGetBlockSize(Amat, &bs)); 7850 nloc = (Iend - Istart) / bs; 7851 7852 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7853 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7854 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7855 7856 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7857 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7858 implementation */ 7859 if (bs > 1) { 7860 PetscCall(MatGetType(Amat, &jtype)); 7861 PetscCall(MatCreate(comm, &Gmat)); 7862 PetscCall(MatSetType(Gmat, jtype)); 7863 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7864 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7865 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7866 PetscInt *d_nnz, *o_nnz; 7867 MatScalar *aa, val, *AA; 7868 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7869 7870 if (isseqaij) { 7871 a = Amat; 7872 b = NULL; 7873 } else { 7874 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7875 a = d->A; 7876 b = d->B; 7877 } 7878 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7879 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7880 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7881 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7882 const PetscInt *cols1, *cols2; 7883 7884 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7885 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7886 nnz[brow / bs] = nc2 / bs; 7887 if (nc2 % bs) ok = 0; 7888 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7889 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7890 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7891 if (nc1 != nc2) ok = 0; 7892 else { 7893 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7894 if (cols1[jj] != cols2[jj]) ok = 0; 7895 if (cols1[jj] % bs != jj % bs) ok = 0; 7896 } 7897 } 7898 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7899 } 7900 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7901 if (!ok) { 7902 PetscCall(PetscFree2(d_nnz, o_nnz)); 7903 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7904 goto old_bs; 7905 } 7906 } 7907 } 7908 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7909 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7910 PetscCall(PetscFree2(d_nnz, o_nnz)); 7911 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7912 // diag 7913 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7914 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7915 7916 ai = aseq->i; 7917 n = ai[brow + 1] - ai[brow]; 7918 aj = aseq->j + ai[brow]; 7919 for (PetscInt k = 0; k < n; k += bs) { // block columns 7920 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7921 val = 0; 7922 if (index_size == 0) { 7923 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7924 aa = aseq->a + ai[brow + ii] + k; 7925 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7926 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7927 } 7928 } 7929 } else { // use (index,index) value if provided 7930 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7931 PetscInt ii = index[iii]; 7932 aa = aseq->a + ai[brow + ii] + k; 7933 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7934 PetscInt jj = index[jjj]; 7935 val += PetscAbs(PetscRealPart(aa[jj])); 7936 } 7937 } 7938 } 7939 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7940 AA[k / bs] = val; 7941 } 7942 grow = Istart / bs + brow / bs; 7943 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7944 } 7945 // off-diag 7946 if (ismpiaij) { 7947 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7948 const PetscScalar *vals; 7949 const PetscInt *cols, *garray = aij->garray; 7950 7951 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7952 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7953 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7954 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7955 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7956 AA[k / bs] = 0; 7957 AJ[cidx] = garray[cols[k]] / bs; 7958 } 7959 nc = ncols / bs; 7960 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7961 if (index_size == 0) { 7962 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7963 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7964 for (PetscInt k = 0; k < ncols; k += bs) { 7965 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7966 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7967 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7968 } 7969 } 7970 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7971 } 7972 } else { // use (index,index) value if provided 7973 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7974 PetscInt ii = index[iii]; 7975 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7976 for (PetscInt k = 0; k < ncols; k += bs) { 7977 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7978 PetscInt jj = index[jjj]; 7979 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7980 } 7981 } 7982 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7983 } 7984 } 7985 grow = Istart / bs + brow / bs; 7986 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7987 } 7988 } 7989 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7990 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7991 PetscCall(PetscFree2(AA, AJ)); 7992 } else { 7993 const PetscScalar *vals; 7994 const PetscInt *idx; 7995 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7996 old_bs: 7997 /* 7998 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7999 */ 8000 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 8001 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 8002 if (isseqaij) { 8003 PetscInt max_d_nnz; 8004 8005 /* 8006 Determine exact preallocation count for (sequential) scalar matrix 8007 */ 8008 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 8009 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 8010 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 8011 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 8012 PetscCall(PetscFree3(w0, w1, w2)); 8013 } else if (ismpiaij) { 8014 Mat Daij, Oaij; 8015 const PetscInt *garray; 8016 PetscInt max_d_nnz; 8017 8018 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 8019 /* 8020 Determine exact preallocation count for diagonal block portion of scalar matrix 8021 */ 8022 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 8023 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 8024 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 8025 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 8026 PetscCall(PetscFree3(w0, w1, w2)); 8027 /* 8028 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 8029 */ 8030 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 8031 o_nnz[jj] = 0; 8032 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 8033 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8034 o_nnz[jj] += ncols; 8035 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8036 } 8037 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8038 } 8039 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8040 /* get scalar copy (norms) of matrix */ 8041 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8042 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8043 PetscCall(PetscFree2(d_nnz, o_nnz)); 8044 for (Ii = Istart; Ii < Iend; Ii++) { 8045 PetscInt dest_row = Ii / bs; 8046 8047 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8048 for (jj = 0; jj < ncols; jj++) { 8049 PetscInt dest_col = idx[jj] / bs; 8050 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8051 8052 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8053 } 8054 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8055 } 8056 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8057 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8058 } 8059 } else { 8060 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8061 else { 8062 Gmat = Amat; 8063 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8064 } 8065 if (isseqaij) { 8066 a = Gmat; 8067 b = NULL; 8068 } else { 8069 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8070 a = d->A; 8071 b = d->B; 8072 } 8073 if (filter >= 0 || scale) { 8074 /* take absolute value of each entry */ 8075 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8076 MatInfo info; 8077 PetscScalar *avals; 8078 8079 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8080 PetscCall(MatSeqAIJGetArray(c, &avals)); 8081 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8082 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8083 } 8084 } 8085 } 8086 if (symmetrize) { 8087 PetscBool isset, issym; 8088 8089 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8090 if (!isset || !issym) { 8091 Mat matTrans; 8092 8093 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8094 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8095 PetscCall(MatDestroy(&matTrans)); 8096 } 8097 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8098 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8099 if (scale) { 8100 /* scale c for all diagonal values = 1 or -1 */ 8101 Vec diag; 8102 8103 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8104 PetscCall(MatGetDiagonal(Gmat, diag)); 8105 PetscCall(VecReciprocal(diag)); 8106 PetscCall(VecSqrtAbs(diag)); 8107 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8108 PetscCall(VecDestroy(&diag)); 8109 } 8110 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8111 if (filter >= 0) { 8112 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8113 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8114 } 8115 *a_Gmat = Gmat; 8116 PetscFunctionReturn(PETSC_SUCCESS); 8117 } 8118 8119 /* 8120 Special version for direct calls from Fortran 8121 */ 8122 8123 /* Change these macros so can be used in void function */ 8124 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8125 #undef PetscCall 8126 #define PetscCall(...) \ 8127 do { \ 8128 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8129 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8130 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8131 return; \ 8132 } \ 8133 } while (0) 8134 8135 #undef SETERRQ 8136 #define SETERRQ(comm, ierr, ...) \ 8137 do { \ 8138 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8139 return; \ 8140 } while (0) 8141 8142 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8143 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8144 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8145 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8146 #else 8147 #endif 8148 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8149 { 8150 Mat mat = *mmat; 8151 PetscInt m = *mm, n = *mn; 8152 InsertMode addv = *maddv; 8153 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8154 PetscScalar value; 8155 8156 MatCheckPreallocated(mat, 1); 8157 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8158 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8159 { 8160 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8161 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8162 PetscBool roworiented = aij->roworiented; 8163 8164 /* Some Variables required in the macro */ 8165 Mat A = aij->A; 8166 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8167 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8168 MatScalar *aa; 8169 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8170 Mat B = aij->B; 8171 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8172 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8173 MatScalar *ba; 8174 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8175 * cannot use "#if defined" inside a macro. */ 8176 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8177 8178 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8179 PetscInt nonew = a->nonew; 8180 MatScalar *ap1, *ap2; 8181 8182 PetscFunctionBegin; 8183 PetscCall(MatSeqAIJGetArray(A, &aa)); 8184 PetscCall(MatSeqAIJGetArray(B, &ba)); 8185 for (i = 0; i < m; i++) { 8186 if (im[i] < 0) continue; 8187 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8188 if (im[i] >= rstart && im[i] < rend) { 8189 row = im[i] - rstart; 8190 lastcol1 = -1; 8191 rp1 = aj + ai[row]; 8192 ap1 = aa + ai[row]; 8193 rmax1 = aimax[row]; 8194 nrow1 = ailen[row]; 8195 low1 = 0; 8196 high1 = nrow1; 8197 lastcol2 = -1; 8198 rp2 = bj + bi[row]; 8199 ap2 = ba + bi[row]; 8200 rmax2 = bimax[row]; 8201 nrow2 = bilen[row]; 8202 low2 = 0; 8203 high2 = nrow2; 8204 8205 for (j = 0; j < n; j++) { 8206 if (roworiented) value = v[i * n + j]; 8207 else value = v[i + j * m]; 8208 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8209 if (in[j] >= cstart && in[j] < cend) { 8210 col = in[j] - cstart; 8211 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8212 } else if (in[j] < 0) continue; 8213 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8214 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8215 } else { 8216 if (mat->was_assembled) { 8217 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8218 #if defined(PETSC_USE_CTABLE) 8219 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8220 col--; 8221 #else 8222 col = aij->colmap[in[j]] - 1; 8223 #endif 8224 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8225 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8226 col = in[j]; 8227 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8228 B = aij->B; 8229 b = (Mat_SeqAIJ *)B->data; 8230 bimax = b->imax; 8231 bi = b->i; 8232 bilen = b->ilen; 8233 bj = b->j; 8234 rp2 = bj + bi[row]; 8235 ap2 = ba + bi[row]; 8236 rmax2 = bimax[row]; 8237 nrow2 = bilen[row]; 8238 low2 = 0; 8239 high2 = nrow2; 8240 bm = aij->B->rmap->n; 8241 ba = b->a; 8242 inserted = PETSC_FALSE; 8243 } 8244 } else col = in[j]; 8245 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8246 } 8247 } 8248 } else if (!aij->donotstash) { 8249 if (roworiented) { 8250 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8251 } else { 8252 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8253 } 8254 } 8255 } 8256 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8257 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8258 } 8259 PetscFunctionReturnVoid(); 8260 } 8261 8262 /* Undefining these here since they were redefined from their original definition above! No 8263 * other PETSc functions should be defined past this point, as it is impossible to recover the 8264 * original definitions */ 8265 #undef PetscCall 8266 #undef SETERRQ 8267