1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow down the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 618 } 619 } 620 } else { 621 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 626 } else { 627 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 628 } 629 } 630 } 631 } 632 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 633 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /* 638 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 639 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 640 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 641 */ 642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 643 { 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 645 Mat A = aij->A; /* diagonal part of the matrix */ 646 Mat B = aij->B; /* off-diagonal part of the matrix */ 647 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 648 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 649 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 650 PetscInt *ailen = a->ilen, *aj = a->j; 651 PetscInt *bilen = b->ilen, *bj = b->j; 652 PetscInt am = aij->A->rmap->n, j; 653 PetscInt diag_so_far = 0, dnz; 654 PetscInt offd_so_far = 0, onz; 655 656 PetscFunctionBegin; 657 /* Iterate over all rows of the matrix */ 658 for (j = 0; j < am; j++) { 659 dnz = onz = 0; 660 /* Iterate over all non-zero columns of the current row */ 661 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 662 /* If column is in the diagonal */ 663 if (mat_j[col] >= cstart && mat_j[col] < cend) { 664 aj[diag_so_far++] = mat_j[col] - cstart; 665 dnz++; 666 } else { /* off-diagonal entries */ 667 bj[offd_so_far++] = mat_j[col]; 668 onz++; 669 } 670 } 671 ailen[j] = dnz; 672 bilen[j] = onz; 673 } 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 677 /* 678 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 679 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 680 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 681 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 682 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 683 */ 684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 685 { 686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 687 Mat A = aij->A; /* diagonal part of the matrix */ 688 Mat B = aij->B; /* off-diagonal part of the matrix */ 689 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 690 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 691 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 692 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 693 PetscInt *ailen = a->ilen, *aj = a->j; 694 PetscInt *bilen = b->ilen, *bj = b->j; 695 PetscInt am = aij->A->rmap->n, j; 696 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 697 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 698 PetscScalar *aa = a->a, *ba = b->a; 699 700 PetscFunctionBegin; 701 /* Iterate over all rows of the matrix */ 702 for (j = 0; j < am; j++) { 703 dnz_row = onz_row = 0; 704 rowstart_offd = full_offd_i[j]; 705 rowstart_diag = full_diag_i[j]; 706 /* Iterate over all non-zero columns of the current row */ 707 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 708 /* If column is in the diagonal */ 709 if (mat_j[col] >= cstart && mat_j[col] < cend) { 710 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 711 aa[rowstart_diag + dnz_row] = mat_a[col]; 712 dnz_row++; 713 } else { /* off-diagonal entries */ 714 bj[rowstart_offd + onz_row] = mat_j[col]; 715 ba[rowstart_offd + onz_row] = mat_a[col]; 716 onz_row++; 717 } 718 } 719 ailen[j] = dnz_row; 720 bilen[j] = onz_row; 721 } 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 728 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 730 731 PetscFunctionBegin; 732 for (i = 0; i < m; i++) { 733 if (idxm[i] < 0) continue; /* negative row */ 734 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 735 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 736 row = idxm[i] - rstart; 737 for (j = 0; j < n; j++) { 738 if (idxn[j] < 0) continue; /* negative column */ 739 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 743 } else { 744 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 745 #if defined(PETSC_USE_CTABLE) 746 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 747 col--; 748 #else 749 col = aij->colmap[idxn[j]] - 1; 750 #endif 751 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 752 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 753 } 754 } 755 } 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 762 PetscInt nstash, reallocs; 763 764 PetscFunctionBegin; 765 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 766 767 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 768 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 769 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 776 PetscMPIInt n; 777 PetscInt i, j, rstart, ncols, flg; 778 PetscInt *row, *col; 779 PetscBool all_assembled; 780 PetscScalar *val; 781 782 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 783 784 PetscFunctionBegin; 785 if (!aij->donotstash && !mat->nooffprocentries) { 786 while (1) { 787 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 788 if (!flg) break; 789 790 for (i = 0; i < n;) { 791 /* Now identify the consecutive vals belonging to the same row */ 792 for (j = i, rstart = row[j]; j < n; j++) { 793 if (row[j] != rstart) break; 794 } 795 if (j < n) ncols = j - i; 796 else ncols = n - i; 797 /* Now assemble all these values with a single function call */ 798 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 799 i = j; 800 } 801 } 802 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 803 } 804 #if defined(PETSC_HAVE_DEVICE) 805 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 806 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 807 if (mat->boundtocpu) { 808 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 809 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 810 } 811 #endif 812 PetscCall(MatAssemblyBegin(aij->A, mode)); 813 PetscCall(MatAssemblyEnd(aij->A, mode)); 814 815 /* determine if any process has disassembled, if so we must 816 also disassemble ourself, in order that we may reassemble. */ 817 /* 818 if nonzero structure of submatrix B cannot change then we know that 819 no process disassembled thus we can skip this stuff 820 */ 821 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 822 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 823 if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 824 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 825 } 826 } 827 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 828 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 829 #if defined(PETSC_HAVE_DEVICE) 830 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 831 #endif 832 PetscCall(MatAssemblyBegin(aij->B, mode)); 833 PetscCall(MatAssemblyEnd(aij->B, mode)); 834 835 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 836 837 aij->rowvalues = NULL; 838 839 PetscCall(VecDestroy(&aij->diag)); 840 841 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 842 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 843 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 844 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 845 } 846 #if defined(PETSC_HAVE_DEVICE) 847 mat->offloadmask = PETSC_OFFLOAD_BOTH; 848 #endif 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 855 856 PetscFunctionBegin; 857 PetscCall(MatZeroEntries(l->A)); 858 PetscCall(MatZeroEntries(l->B)); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 869 PetscFunctionBegin; 870 /* get locally owned rows */ 871 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 872 PetscCall(MatHasCongruentLayouts(A, &cong)); 873 /* fix right-hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 879 PetscCall(VecGetArrayRead(x, &xx)); 880 PetscCall(VecGetArray(b, &bb)); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 882 PetscCall(VecRestoreArrayRead(x, &xx)); 883 PetscCall(VecRestoreArray(b, &bb)); 884 } 885 886 if (diag != 0.0 && cong) { 887 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 890 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 891 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 892 PetscInt nnwA, nnwB; 893 PetscBool nnzA, nnzB; 894 895 nnwA = aijA->nonew; 896 nnwB = aijB->nonew; 897 nnzA = aijA->keepnonzeropattern; 898 nnzB = aijB->keepnonzeropattern; 899 if (!nnzA) { 900 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 901 aijA->nonew = 0; 902 } 903 if (!nnzB) { 904 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 905 aijB->nonew = 0; 906 } 907 /* Must zero here before the next loop */ 908 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 909 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 910 for (r = 0; r < len; ++r) { 911 const PetscInt row = lrows[r] + A->rmap->rstart; 912 if (row >= A->cmap->N) continue; 913 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 914 } 915 aijA->nonew = nnwA; 916 aijB->nonew = nnwB; 917 } else { 918 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 919 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 920 } 921 PetscCall(PetscFree(lrows)); 922 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 927 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 928 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 929 } 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 934 { 935 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 936 PetscInt n = A->rmap->n; 937 PetscInt i, j, r, m, len = 0; 938 PetscInt *lrows, *owners = A->rmap->range; 939 PetscMPIInt p = 0; 940 PetscSFNode *rrows; 941 PetscSF sf; 942 const PetscScalar *xx; 943 PetscScalar *bb, *mask, *aij_a; 944 Vec xmask, lmask; 945 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 946 const PetscInt *aj, *ii, *ridx; 947 PetscScalar *aa; 948 949 PetscFunctionBegin; 950 /* Create SF where leaves are input rows and roots are owned rows */ 951 PetscCall(PetscMalloc1(n, &lrows)); 952 for (r = 0; r < n; ++r) lrows[r] = -1; 953 PetscCall(PetscMalloc1(N, &rrows)); 954 for (r = 0; r < N; ++r) { 955 const PetscInt idx = rows[r]; 956 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 957 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 958 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 959 } 960 rrows[r].rank = p; 961 rrows[r].index = rows[r] - owners[p]; 962 } 963 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 964 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 965 /* Collect flags for rows to be zeroed */ 966 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 967 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFDestroy(&sf)); 969 /* Compress and put in row numbers */ 970 for (r = 0; r < n; ++r) 971 if (lrows[r] >= 0) lrows[len++] = r; 972 /* zero diagonal part of matrix */ 973 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 974 /* handle off-diagonal part of matrix */ 975 PetscCall(MatCreateVecs(A, &xmask, NULL)); 976 PetscCall(VecDuplicate(l->lvec, &lmask)); 977 PetscCall(VecGetArray(xmask, &bb)); 978 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 979 PetscCall(VecRestoreArray(xmask, &bb)); 980 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecDestroy(&xmask)); 983 if (x && b) { /* this code is buggy when the row and column layout don't match */ 984 PetscBool cong; 985 986 PetscCall(MatHasCongruentLayouts(A, &cong)); 987 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 988 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecGetArrayRead(l->lvec, &xx)); 991 PetscCall(VecGetArray(b, &bb)); 992 } 993 PetscCall(VecGetArray(lmask, &mask)); 994 /* remove zeroed rows of off-diagonal matrix */ 995 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 996 ii = aij->i; 997 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 998 /* loop over all elements of off process part of matrix zeroing removed columns*/ 999 if (aij->compressedrow.use) { 1000 m = aij->compressedrow.nrows; 1001 ii = aij->compressedrow.i; 1002 ridx = aij->compressedrow.rindex; 1003 for (i = 0; i < m; i++) { 1004 n = ii[i + 1] - ii[i]; 1005 aj = aij->j + ii[i]; 1006 aa = aij_a + ii[i]; 1007 1008 for (j = 0; j < n; j++) { 1009 if (PetscAbsScalar(mask[*aj])) { 1010 if (b) bb[*ridx] -= *aa * xx[*aj]; 1011 *aa = 0.0; 1012 } 1013 aa++; 1014 aj++; 1015 } 1016 ridx++; 1017 } 1018 } else { /* do not use compressed row format */ 1019 m = l->B->rmap->n; 1020 for (i = 0; i < m; i++) { 1021 n = ii[i + 1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij_a + ii[i]; 1024 for (j = 0; j < n; j++) { 1025 if (PetscAbsScalar(mask[*aj])) { 1026 if (b) bb[i] -= *aa * xx[*aj]; 1027 *aa = 0.0; 1028 } 1029 aa++; 1030 aj++; 1031 } 1032 } 1033 } 1034 if (x && b) { 1035 PetscCall(VecRestoreArray(b, &bb)); 1036 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1037 } 1038 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1039 PetscCall(VecRestoreArray(lmask, &mask)); 1040 PetscCall(VecDestroy(&lmask)); 1041 PetscCall(PetscFree(lrows)); 1042 1043 /* only change matrix nonzero state if pattern was allowed to be changed */ 1044 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1045 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1046 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1047 } 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 PetscInt nt; 1055 VecScatter Mvctx = a->Mvctx; 1056 1057 PetscFunctionBegin; 1058 PetscCall(VecGetLocalSize(xx, &nt)); 1059 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1060 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1061 PetscUseTypeMethod(a->A, mult, xx, yy); 1062 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1063 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 1071 PetscFunctionBegin; 1072 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1073 PetscFunctionReturn(PETSC_SUCCESS); 1074 } 1075 1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1079 VecScatter Mvctx = a->Mvctx; 1080 1081 PetscFunctionBegin; 1082 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1084 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1085 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 /* do nondiagonal part */ 1095 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1096 /* do local part */ 1097 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1098 /* add partial results together */ 1099 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1100 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1105 { 1106 MPI_Comm comm; 1107 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1108 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1109 IS Me, Notme; 1110 PetscInt M, N, first, last, *notme, i; 1111 PetscBool lf; 1112 PetscMPIInt size; 1113 1114 PetscFunctionBegin; 1115 /* Easy test: symmetric diagonal block */ 1116 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1117 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1118 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1119 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1120 PetscCallMPI(MPI_Comm_size(comm, &size)); 1121 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1122 1123 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1124 PetscCall(MatGetSize(Amat, &M, &N)); 1125 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1126 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1127 for (i = 0; i < first; i++) notme[i] = i; 1128 for (i = last; i < M; i++) notme[i - last + first] = i; 1129 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1130 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1131 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1132 Aoff = Aoffs[0]; 1133 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1134 Boff = Boffs[0]; 1135 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1136 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1137 PetscCall(MatDestroyMatrices(1, &Boffs)); 1138 PetscCall(ISDestroy(&Me)); 1139 PetscCall(ISDestroy(&Notme)); 1140 PetscCall(PetscFree(notme)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1151 /* do local part */ 1152 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1153 /* add partial results together */ 1154 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1155 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1169 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1170 PetscCall(MatGetDiagonal(a->A, v)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatScale(a->A, aa)); 1180 PetscCall(MatScale(a->B, aa)); 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1187 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1188 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1189 const PetscInt *garray = aij->garray; 1190 const PetscScalar *aa, *ba; 1191 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1192 PetscInt64 nz, hnz; 1193 PetscInt *rowlens; 1194 PetscInt *colidxs; 1195 PetscScalar *matvals; 1196 PetscMPIInt rank; 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscViewerSetUp(viewer)); 1200 1201 M = mat->rmap->N; 1202 N = mat->cmap->N; 1203 m = mat->rmap->n; 1204 rs = mat->rmap->rstart; 1205 cs = mat->cmap->rstart; 1206 nz = A->nz + B->nz; 1207 1208 /* write matrix header */ 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = M; 1211 header[2] = N; 1212 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1213 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1214 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1215 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1216 1217 /* fill in and store row lengths */ 1218 PetscCall(PetscMalloc1(m, &rowlens)); 1219 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1220 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1221 PetscCall(PetscFree(rowlens)); 1222 1223 /* fill in and store column indices */ 1224 PetscCall(PetscMalloc1(nz, &colidxs)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 colidxs[cnt++] = garray[B->j[jb]]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1231 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1235 PetscCall(PetscFree(colidxs)); 1236 1237 /* fill in and store nonzero values */ 1238 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1239 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1240 PetscCall(PetscMalloc1(nz, &matvals)); 1241 for (cnt = 0, i = 0; i < m; i++) { 1242 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1247 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1248 } 1249 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1251 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1253 PetscCall(PetscFree(matvals)); 1254 1255 /* write block size option to the viewer's .info file */ 1256 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 #include <petscdraw.h> 1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1264 PetscMPIInt rank = aij->rank, size = aij->size; 1265 PetscBool isdraw, isascii, isbinary; 1266 PetscViewer sviewer; 1267 PetscViewerFormat format; 1268 1269 PetscFunctionBegin; 1270 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1273 if (isascii) { 1274 PetscCall(PetscViewerGetFormat(viewer, &format)); 1275 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1276 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1277 PetscCall(PetscMalloc1(size, &nz)); 1278 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1279 for (i = 0; i < size; i++) { 1280 nmax = PetscMax(nmax, nz[i]); 1281 nmin = PetscMin(nmin, nz[i]); 1282 navg += nz[i]; 1283 } 1284 PetscCall(PetscFree(nz)); 1285 navg = navg / size; 1286 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 PetscCall(PetscViewerGetFormat(viewer, &format)); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscInt *inodes = NULL; 1293 1294 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1295 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1296 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1297 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1298 if (!inodes) { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1300 info.memory)); 1301 } else { 1302 PetscCall( 1303 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1304 } 1305 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1307 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1309 PetscCall(PetscViewerFlush(viewer)); 1310 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1312 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount, inodelimit, *inodes; 1316 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1317 if (inodes) { 1318 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1319 } else { 1320 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1321 } 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1329 PetscCall(MatView(aij->A, viewer)); 1330 } else { 1331 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1332 } 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isascii && size == 1) { 1335 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1336 PetscCall(MatView(aij->A, viewer)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } else if (isdraw) { 1339 PetscDraw draw; 1340 PetscBool isnull; 1341 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1342 PetscCall(PetscDrawIsNull(draw, &isnull)); 1343 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 1346 { /* assemble the entire matrix onto first processor */ 1347 Mat A = NULL, Av; 1348 IS isrow, iscol; 1349 1350 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1352 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1353 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1354 /* The commented code uses MatCreateSubMatrices instead */ 1355 /* 1356 Mat *AA, A = NULL, Av; 1357 IS isrow,iscol; 1358 1359 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1361 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1362 if (rank == 0) { 1363 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1364 A = AA[0]; 1365 Av = AA[0]; 1366 } 1367 PetscCall(MatDestroySubMatrices(1,&AA)); 1368 */ 1369 PetscCall(ISDestroy(&iscol)); 1370 PetscCall(ISDestroy(&isrow)); 1371 /* 1372 Everyone has to call to draw the matrix since the graphics waits are 1373 synchronized across all processors that share the PetscDraw object 1374 */ 1375 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 if (rank == 0) { 1377 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1378 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1379 } 1380 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1387 { 1388 PetscBool isascii, isdraw, issocket, isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1395 if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1412 1413 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1414 if (flag & SOR_ZERO_INITIAL_GUESS) { 1415 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1416 its--; 1417 } 1418 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1447 if (flag & SOR_ZERO_INITIAL_GUESS) { 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1449 its--; 1450 } 1451 while (its--) { 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 1455 /* update rhs: bb1 = bb - B*x */ 1456 PetscCall(VecScale(mat->lvec, -1.0)); 1457 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1458 1459 /* local sweep */ 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1461 } 1462 } else if (flag & SOR_EISENSTAT) { 1463 Vec xx1; 1464 1465 PetscCall(VecDuplicate(bb, &xx1)); 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1467 1468 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 if (!mat->diag) { 1471 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1472 PetscCall(MatGetDiagonal(matin, mat->diag)); 1473 } 1474 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1475 if (hasop) { 1476 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1477 } else { 1478 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1479 } 1480 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1481 1482 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1483 1484 /* local sweep */ 1485 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1486 PetscCall(VecAXPY(xx, 1.0, xx1)); 1487 PetscCall(VecDestroy(&xx1)); 1488 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1489 1490 PetscCall(VecDestroy(&bb1)); 1491 1492 matin->factorerrortype = mat->A->factorerrortype; 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1497 { 1498 Mat aA, aB, Aperm; 1499 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1500 PetscScalar *aa, *ba; 1501 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1502 PetscSF rowsf, sf; 1503 IS parcolp = NULL; 1504 PetscBool done; 1505 1506 PetscFunctionBegin; 1507 PetscCall(MatGetLocalSize(A, &m, &n)); 1508 PetscCall(ISGetIndices(rowp, &rwant)); 1509 PetscCall(ISGetIndices(colp, &cwant)); 1510 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1511 1512 /* Invert row permutation to find out where my rows should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1514 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1515 PetscCall(PetscSFSetFromOptions(rowsf)); 1516 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 1520 /* Invert column permutation to find out where my columns should go */ 1521 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1522 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1523 PetscCall(PetscSFSetFromOptions(sf)); 1524 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1525 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1526 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFDestroy(&sf)); 1528 1529 PetscCall(ISRestoreIndices(rowp, &rwant)); 1530 PetscCall(ISRestoreIndices(colp, &cwant)); 1531 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1532 1533 /* Find out where my gcols should go */ 1534 PetscCall(MatGetSize(aB, NULL, &ng)); 1535 PetscCall(PetscMalloc1(ng, &gcdest)); 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1540 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFDestroy(&sf)); 1542 1543 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1544 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1545 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1546 for (i = 0; i < m; i++) { 1547 PetscInt row = rdest[i]; 1548 PetscMPIInt rowner; 1549 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1550 for (j = ai[i]; j < ai[i + 1]; j++) { 1551 PetscInt col = cdest[aj[j]]; 1552 PetscMPIInt cowner; 1553 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1554 if (rowner == cowner) dnnz[i]++; 1555 else onnz[i]++; 1556 } 1557 for (j = bi[i]; j < bi[i + 1]; j++) { 1558 PetscInt col = gcdest[bj[j]]; 1559 PetscMPIInt cowner; 1560 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1561 if (rowner == cowner) dnnz[i]++; 1562 else onnz[i]++; 1563 } 1564 } 1565 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1566 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&rowsf)); 1570 1571 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1572 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1573 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1574 for (i = 0; i < m; i++) { 1575 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1576 PetscInt j0, rowlen; 1577 rowlen = ai[i + 1] - ai[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1579 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1581 } 1582 rowlen = bi[i + 1] - bi[i]; 1583 for (j0 = j = 0; j < rowlen; j0 = j) { 1584 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1585 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1586 } 1587 } 1588 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1589 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1591 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1592 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1593 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1594 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1595 PetscCall(PetscFree3(work, rdest, cdest)); 1596 PetscCall(PetscFree(gcdest)); 1597 if (parcolp) PetscCall(ISDestroy(&colp)); 1598 *B = Aperm; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1603 { 1604 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1605 1606 PetscFunctionBegin; 1607 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1608 if (ghosts) *ghosts = aij->garray; 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1613 { 1614 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1615 Mat A = mat->A, B = mat->B; 1616 PetscLogDouble isend[5], irecv[5]; 1617 1618 PetscFunctionBegin; 1619 info->block_size = 1.0; 1620 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1621 1622 isend[0] = info->nz_used; 1623 isend[1] = info->nz_allocated; 1624 isend[2] = info->nz_unneeded; 1625 isend[3] = info->memory; 1626 isend[4] = info->mallocs; 1627 1628 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1629 1630 isend[0] += info->nz_used; 1631 isend[1] += info->nz_allocated; 1632 isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; 1634 isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A, 1); 1679 PetscCall(MatSetOption(a->A, op, flg)); 1680 PetscCall(MatSetOption(a->B, op, flg)); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A, 1); 1684 a->roworiented = flg; 1685 1686 PetscCall(MatSetOption(a->A, op, flg)); 1687 PetscCall(MatSetOption(a->B, op, flg)); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 default: 1707 break; 1708 } 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1713 { 1714 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1715 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1716 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1717 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1718 PetscInt *cmap, *idx_p; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1722 mat->getrowactive = PETSC_TRUE; 1723 1724 if (!mat->rowvalues && (idx || v)) { 1725 /* 1726 allocate enough space to hold information from the longest row. 1727 */ 1728 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1729 PetscInt max = 1, tmp; 1730 for (i = 0; i < matin->rmap->n; i++) { 1731 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1732 if (max < tmp) max = tmp; 1733 } 1734 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1735 } 1736 1737 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1738 lrow = row - rstart; 1739 1740 pvA = &vworkA; 1741 pcA = &cworkA; 1742 pvB = &vworkB; 1743 pcB = &cworkB; 1744 if (!v) { 1745 pvA = NULL; 1746 pvB = NULL; 1747 } 1748 if (!idx) { 1749 pcA = NULL; 1750 if (!v) pcB = NULL; 1751 } 1752 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1753 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1754 nztot = nzA + nzB; 1755 1756 cmap = mat->garray; 1757 if (v || idx) { 1758 if (nztot) { 1759 /* Sort by increasing column numbers, assuming A and B already sorted */ 1760 PetscInt imark = -1; 1761 if (v) { 1762 *v = v_p = mat->rowvalues; 1763 for (i = 0; i < nzB; i++) { 1764 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1765 else break; 1766 } 1767 imark = i; 1768 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1769 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1770 } 1771 if (idx) { 1772 *idx = idx_p = mat->rowindices; 1773 if (imark > -1) { 1774 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1775 } else { 1776 for (i = 0; i < nzB; i++) { 1777 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1778 else break; 1779 } 1780 imark = i; 1781 } 1782 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1783 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1784 } 1785 } else { 1786 if (idx) *idx = NULL; 1787 if (v) *v = NULL; 1788 } 1789 } 1790 *nz = nztot; 1791 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1792 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 1800 PetscFunctionBegin; 1801 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1802 aij->getrowactive = PETSC_FALSE; 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1807 { 1808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1809 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1810 PetscInt i, j; 1811 PetscReal sum = 0.0; 1812 const MatScalar *v, *amata, *bmata; 1813 1814 PetscFunctionBegin; 1815 if (aij->size == 1) { 1816 PetscCall(MatNorm(aij->A, type, norm)); 1817 } else { 1818 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1819 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1820 if (type == NORM_FROBENIUS) { 1821 v = amata; 1822 for (i = 0; i < amat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v) * (*v)); 1824 v++; 1825 } 1826 v = bmata; 1827 for (i = 0; i < bmat->nz; i++) { 1828 sum += PetscRealPart(PetscConj(*v) * (*v)); 1829 v++; 1830 } 1831 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1832 *norm = PetscSqrtReal(*norm); 1833 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1834 } else if (type == NORM_1) { /* max column norm */ 1835 Vec col; 1836 PetscScalar *array; 1837 PetscInt *jj, *garray = aij->garray; 1838 1839 PetscCall(MatCreateVecs(mat, &col, NULL)); 1840 PetscCall(VecSet(col, 0.0)); 1841 PetscCall(VecGetArrayWrite(col, &array)); 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) array[*jj++] += PetscAbsScalar(*v++); 1845 PetscCall(VecRestoreArrayWrite(col, &array)); 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) PetscCall(VecSetValue(col, garray[*jj++], PetscAbsScalar(*v++), ADD_VALUES)); 1849 PetscCall(VecAssemblyBegin(col)); 1850 PetscCall(VecAssemblyEnd(col)); 1851 PetscCall(VecNorm(col, NORM_INFINITY, norm)); 1852 PetscCall(VecDestroy(&col)); 1853 } else if (type == NORM_INFINITY) { /* max row norm */ 1854 PetscReal ntemp = 0.0; 1855 for (j = 0; j < aij->A->rmap->n; j++) { 1856 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1857 sum = 0.0; 1858 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); 1860 v++; 1861 } 1862 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1863 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 if (sum > ntemp) ntemp = sum; 1868 } 1869 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1871 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1872 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1873 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1874 } 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 1878 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1881 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1882 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1883 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1884 Mat B, A_diag, *B_diag; 1885 const MatScalar *pbv, *bv; 1886 1887 PetscFunctionBegin; 1888 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1889 ma = A->rmap->n; 1890 na = A->cmap->n; 1891 mb = a->B->rmap->n; 1892 nb = a->B->cmap->n; 1893 ai = Aloc->i; 1894 aj = Aloc->j; 1895 bi = Bloc->i; 1896 bj = Bloc->j; 1897 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1898 PetscInt *d_nnz, *g_nnz, *o_nnz; 1899 PetscSFNode *oloc; 1900 PETSC_UNUSED PetscSF sf; 1901 1902 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1903 /* compute d_nnz for preallocation */ 1904 PetscCall(PetscArrayzero(d_nnz, na)); 1905 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1906 /* compute local off-diagonal contributions */ 1907 PetscCall(PetscArrayzero(g_nnz, nb)); 1908 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1909 /* map those to global */ 1910 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1911 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1912 PetscCall(PetscSFSetFromOptions(sf)); 1913 PetscCall(PetscArrayzero(o_nnz, na)); 1914 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1915 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1916 PetscCall(PetscSFDestroy(&sf)); 1917 1918 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1919 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1920 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1921 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1922 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1923 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1924 } else { 1925 B = *matout; 1926 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1927 } 1928 1929 b = (Mat_MPIAIJ *)B->data; 1930 A_diag = a->A; 1931 B_diag = &b->A; 1932 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1933 A_diag_ncol = A_diag->cmap->N; 1934 B_diag_ilen = sub_B_diag->ilen; 1935 B_diag_i = sub_B_diag->i; 1936 1937 /* Set ilen for diagonal of B */ 1938 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1939 1940 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1941 very quickly (=without using MatSetValues), because all writes are local. */ 1942 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1943 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1944 1945 /* copy over the B part */ 1946 PetscCall(PetscMalloc1(bi[mb], &cols)); 1947 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1948 pbv = bv; 1949 row = A->rmap->rstart; 1950 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1951 cols_tmp = cols; 1952 for (i = 0; i < mb; i++) { 1953 ncol = bi[i + 1] - bi[i]; 1954 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1955 row++; 1956 if (pbv) pbv += ncol; 1957 if (cols_tmp) cols_tmp += ncol; 1958 } 1959 PetscCall(PetscFree(cols)); 1960 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1961 1962 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1963 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1964 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1965 *matout = B; 1966 } else { 1967 PetscCall(MatHeaderMerge(A, &B)); 1968 } 1969 PetscFunctionReturn(PETSC_SUCCESS); 1970 } 1971 1972 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1973 { 1974 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1975 Mat a = aij->A, b = aij->B; 1976 PetscInt s1, s2, s3; 1977 1978 PetscFunctionBegin; 1979 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1980 if (rr) { 1981 PetscCall(VecGetLocalSize(rr, &s1)); 1982 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1983 /* Overlap communication with computation. */ 1984 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1985 } 1986 if (ll) { 1987 PetscCall(VecGetLocalSize(ll, &s1)); 1988 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1989 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1990 } 1991 /* scale the diagonal block */ 1992 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1993 1994 if (rr) { 1995 /* Do a scatter end and then right scale the off-diagonal block */ 1996 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1997 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1998 } 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2003 { 2004 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2005 2006 PetscFunctionBegin; 2007 PetscCall(MatSetUnfactored(a->A)); 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2012 { 2013 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2014 Mat a, b, c, d; 2015 PetscBool flg; 2016 2017 PetscFunctionBegin; 2018 a = matA->A; 2019 b = matA->B; 2020 c = matB->A; 2021 d = matB->B; 2022 2023 PetscCall(MatEqual(a, c, &flg)); 2024 if (flg) PetscCall(MatEqual(b, d, &flg)); 2025 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2026 PetscFunctionReturn(PETSC_SUCCESS); 2027 } 2028 2029 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2030 { 2031 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2032 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2033 2034 PetscFunctionBegin; 2035 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2036 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2037 /* because of the column compression in the off-processor part of the matrix a->B, 2038 the number of columns in a->B and b->B may be different, hence we cannot call 2039 the MatCopy() directly on the two parts. If need be, we can provide a more 2040 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2041 then copying the submatrices */ 2042 PetscCall(MatCopy_Basic(A, B, str)); 2043 } else { 2044 PetscCall(MatCopy(a->A, b->A, str)); 2045 PetscCall(MatCopy(a->B, b->B, str)); 2046 } 2047 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 /* 2052 Computes the number of nonzeros per row needed for preallocation when X and Y 2053 have different nonzero structure. 2054 */ 2055 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2056 { 2057 PetscInt i, j, k, nzx, nzy; 2058 2059 PetscFunctionBegin; 2060 /* Set the number of nonzeros in the new matrix */ 2061 for (i = 0; i < m; i++) { 2062 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2063 nzx = xi[i + 1] - xi[i]; 2064 nzy = yi[i + 1] - yi[i]; 2065 nnz[i] = 0; 2066 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2067 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2068 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2069 nnz[i]++; 2070 } 2071 for (; k < nzy; k++) nnz[i]++; 2072 } 2073 PetscFunctionReturn(PETSC_SUCCESS); 2074 } 2075 2076 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2077 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2078 { 2079 PetscInt m = Y->rmap->N; 2080 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2081 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2082 2083 PetscFunctionBegin; 2084 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2085 PetscFunctionReturn(PETSC_SUCCESS); 2086 } 2087 2088 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2089 { 2090 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2091 2092 PetscFunctionBegin; 2093 if (str == SAME_NONZERO_PATTERN) { 2094 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2095 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2096 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2097 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2098 } else { 2099 Mat B; 2100 PetscInt *nnz_d, *nnz_o; 2101 2102 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2103 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2104 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2105 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2106 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2107 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2108 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2109 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2110 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2111 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2112 PetscCall(MatHeaderMerge(Y, &B)); 2113 PetscCall(PetscFree(nnz_d)); 2114 PetscCall(PetscFree(nnz_o)); 2115 } 2116 PetscFunctionReturn(PETSC_SUCCESS); 2117 } 2118 2119 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2120 2121 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2122 { 2123 PetscFunctionBegin; 2124 if (PetscDefined(USE_COMPLEX)) { 2125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2126 2127 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2128 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2129 } 2130 PetscFunctionReturn(PETSC_SUCCESS); 2131 } 2132 2133 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2134 { 2135 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2136 2137 PetscFunctionBegin; 2138 PetscCall(MatRealPart(a->A)); 2139 PetscCall(MatRealPart(a->B)); 2140 PetscFunctionReturn(PETSC_SUCCESS); 2141 } 2142 2143 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2144 { 2145 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2146 2147 PetscFunctionBegin; 2148 PetscCall(MatImaginaryPart(a->A)); 2149 PetscCall(MatImaginaryPart(a->B)); 2150 PetscFunctionReturn(PETSC_SUCCESS); 2151 } 2152 2153 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2154 { 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2156 PetscInt i, *idxb = NULL, m = A->rmap->n; 2157 PetscScalar *vv; 2158 Vec vB, vA; 2159 const PetscScalar *va, *vb; 2160 2161 PetscFunctionBegin; 2162 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2163 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2164 2165 PetscCall(VecGetArrayRead(vA, &va)); 2166 if (idx) { 2167 for (i = 0; i < m; i++) { 2168 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2169 } 2170 } 2171 2172 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2173 PetscCall(PetscMalloc1(m, &idxb)); 2174 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2175 2176 PetscCall(VecGetArrayWrite(v, &vv)); 2177 PetscCall(VecGetArrayRead(vB, &vb)); 2178 for (i = 0; i < m; i++) { 2179 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2180 vv[i] = vb[i]; 2181 if (idx) idx[i] = a->garray[idxb[i]]; 2182 } else { 2183 vv[i] = va[i]; 2184 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2185 } 2186 } 2187 PetscCall(VecRestoreArrayWrite(v, &vv)); 2188 PetscCall(VecRestoreArrayRead(vA, &va)); 2189 PetscCall(VecRestoreArrayRead(vB, &vb)); 2190 PetscCall(PetscFree(idxb)); 2191 PetscCall(VecDestroy(&vA)); 2192 PetscCall(VecDestroy(&vB)); 2193 PetscFunctionReturn(PETSC_SUCCESS); 2194 } 2195 2196 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2197 { 2198 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2199 Vec vB, vA; 2200 2201 PetscFunctionBegin; 2202 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2203 PetscCall(MatGetRowSumAbs(a->A, vA)); 2204 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2205 PetscCall(MatGetRowSumAbs(a->B, vB)); 2206 PetscCall(VecAXPY(vA, 1.0, vB)); 2207 PetscCall(VecDestroy(&vB)); 2208 PetscCall(VecCopy(vA, v)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2214 { 2215 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2216 PetscInt m = A->rmap->n, n = A->cmap->n; 2217 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2218 PetscInt *cmap = mat->garray; 2219 PetscInt *diagIdx, *offdiagIdx; 2220 Vec diagV, offdiagV; 2221 PetscScalar *a, *diagA, *offdiagA; 2222 const PetscScalar *ba, *bav; 2223 PetscInt r, j, col, ncols, *bi, *bj; 2224 Mat B = mat->B; 2225 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2226 2227 PetscFunctionBegin; 2228 /* When a process holds entire A and other processes have no entry */ 2229 if (A->cmap->N == n) { 2230 PetscCall(VecGetArrayWrite(v, &diagA)); 2231 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2232 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2233 PetscCall(VecDestroy(&diagV)); 2234 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2235 PetscFunctionReturn(PETSC_SUCCESS); 2236 } else if (n == 0) { 2237 if (m) { 2238 PetscCall(VecGetArrayWrite(v, &a)); 2239 for (r = 0; r < m; r++) { 2240 a[r] = 0.0; 2241 if (idx) idx[r] = -1; 2242 } 2243 PetscCall(VecRestoreArrayWrite(v, &a)); 2244 } 2245 PetscFunctionReturn(PETSC_SUCCESS); 2246 } 2247 2248 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2249 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2250 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2251 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2252 2253 /* Get offdiagIdx[] for implicit 0.0 */ 2254 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2255 ba = bav; 2256 bi = b->i; 2257 bj = b->j; 2258 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2259 for (r = 0; r < m; r++) { 2260 ncols = bi[r + 1] - bi[r]; 2261 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2262 offdiagA[r] = *ba; 2263 offdiagIdx[r] = cmap[0]; 2264 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2265 offdiagA[r] = 0.0; 2266 2267 /* Find first hole in the cmap */ 2268 for (j = 0; j < ncols; j++) { 2269 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2270 if (col > j && j < cstart) { 2271 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2272 break; 2273 } else if (col > j + n && j >= cstart) { 2274 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2275 break; 2276 } 2277 } 2278 if (j == ncols && ncols < A->cmap->N - n) { 2279 /* a hole is outside compressed Bcols */ 2280 if (ncols == 0) { 2281 if (cstart) { 2282 offdiagIdx[r] = 0; 2283 } else offdiagIdx[r] = cend; 2284 } else { /* ncols > 0 */ 2285 offdiagIdx[r] = cmap[ncols - 1] + 1; 2286 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2287 } 2288 } 2289 } 2290 2291 for (j = 0; j < ncols; j++) { 2292 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2293 offdiagA[r] = *ba; 2294 offdiagIdx[r] = cmap[*bj]; 2295 } 2296 ba++; 2297 bj++; 2298 } 2299 } 2300 2301 PetscCall(VecGetArrayWrite(v, &a)); 2302 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2303 for (r = 0; r < m; ++r) { 2304 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2305 a[r] = diagA[r]; 2306 if (idx) idx[r] = cstart + diagIdx[r]; 2307 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2308 a[r] = diagA[r]; 2309 if (idx) { 2310 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2311 idx[r] = cstart + diagIdx[r]; 2312 } else idx[r] = offdiagIdx[r]; 2313 } 2314 } else { 2315 a[r] = offdiagA[r]; 2316 if (idx) idx[r] = offdiagIdx[r]; 2317 } 2318 } 2319 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2320 PetscCall(VecRestoreArrayWrite(v, &a)); 2321 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2322 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2323 PetscCall(VecDestroy(&diagV)); 2324 PetscCall(VecDestroy(&offdiagV)); 2325 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2330 { 2331 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2332 PetscInt m = A->rmap->n, n = A->cmap->n; 2333 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2334 PetscInt *cmap = mat->garray; 2335 PetscInt *diagIdx, *offdiagIdx; 2336 Vec diagV, offdiagV; 2337 PetscScalar *a, *diagA, *offdiagA; 2338 const PetscScalar *ba, *bav; 2339 PetscInt r, j, col, ncols, *bi, *bj; 2340 Mat B = mat->B; 2341 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2342 2343 PetscFunctionBegin; 2344 /* When a process holds entire A and other processes have no entry */ 2345 if (A->cmap->N == n) { 2346 PetscCall(VecGetArrayWrite(v, &diagA)); 2347 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2348 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2349 PetscCall(VecDestroy(&diagV)); 2350 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2351 PetscFunctionReturn(PETSC_SUCCESS); 2352 } else if (n == 0) { 2353 if (m) { 2354 PetscCall(VecGetArrayWrite(v, &a)); 2355 for (r = 0; r < m; r++) { 2356 a[r] = PETSC_MAX_REAL; 2357 if (idx) idx[r] = -1; 2358 } 2359 PetscCall(VecRestoreArrayWrite(v, &a)); 2360 } 2361 PetscFunctionReturn(PETSC_SUCCESS); 2362 } 2363 2364 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2365 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2366 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2367 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2368 2369 /* Get offdiagIdx[] for implicit 0.0 */ 2370 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2371 ba = bav; 2372 bi = b->i; 2373 bj = b->j; 2374 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2375 for (r = 0; r < m; r++) { 2376 ncols = bi[r + 1] - bi[r]; 2377 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2378 offdiagA[r] = *ba; 2379 offdiagIdx[r] = cmap[0]; 2380 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2381 offdiagA[r] = 0.0; 2382 2383 /* Find first hole in the cmap */ 2384 for (j = 0; j < ncols; j++) { 2385 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2386 if (col > j && j < cstart) { 2387 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2388 break; 2389 } else if (col > j + n && j >= cstart) { 2390 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2391 break; 2392 } 2393 } 2394 if (j == ncols && ncols < A->cmap->N - n) { 2395 /* a hole is outside compressed Bcols */ 2396 if (ncols == 0) { 2397 if (cstart) { 2398 offdiagIdx[r] = 0; 2399 } else offdiagIdx[r] = cend; 2400 } else { /* ncols > 0 */ 2401 offdiagIdx[r] = cmap[ncols - 1] + 1; 2402 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2403 } 2404 } 2405 } 2406 2407 for (j = 0; j < ncols; j++) { 2408 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2409 offdiagA[r] = *ba; 2410 offdiagIdx[r] = cmap[*bj]; 2411 } 2412 ba++; 2413 bj++; 2414 } 2415 } 2416 2417 PetscCall(VecGetArrayWrite(v, &a)); 2418 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2419 for (r = 0; r < m; ++r) { 2420 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2421 a[r] = diagA[r]; 2422 if (idx) idx[r] = cstart + diagIdx[r]; 2423 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2424 a[r] = diagA[r]; 2425 if (idx) { 2426 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2427 idx[r] = cstart + diagIdx[r]; 2428 } else idx[r] = offdiagIdx[r]; 2429 } 2430 } else { 2431 a[r] = offdiagA[r]; 2432 if (idx) idx[r] = offdiagIdx[r]; 2433 } 2434 } 2435 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2436 PetscCall(VecRestoreArrayWrite(v, &a)); 2437 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2438 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2439 PetscCall(VecDestroy(&diagV)); 2440 PetscCall(VecDestroy(&offdiagV)); 2441 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2442 PetscFunctionReturn(PETSC_SUCCESS); 2443 } 2444 2445 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2448 PetscInt m = A->rmap->n, n = A->cmap->n; 2449 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2450 PetscInt *cmap = mat->garray; 2451 PetscInt *diagIdx, *offdiagIdx; 2452 Vec diagV, offdiagV; 2453 PetscScalar *a, *diagA, *offdiagA; 2454 const PetscScalar *ba, *bav; 2455 PetscInt r, j, col, ncols, *bi, *bj; 2456 Mat B = mat->B; 2457 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2458 2459 PetscFunctionBegin; 2460 /* When a process holds entire A and other processes have no entry */ 2461 if (A->cmap->N == n) { 2462 PetscCall(VecGetArrayWrite(v, &diagA)); 2463 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2464 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2465 PetscCall(VecDestroy(&diagV)); 2466 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2467 PetscFunctionReturn(PETSC_SUCCESS); 2468 } else if (n == 0) { 2469 if (m) { 2470 PetscCall(VecGetArrayWrite(v, &a)); 2471 for (r = 0; r < m; r++) { 2472 a[r] = PETSC_MIN_REAL; 2473 if (idx) idx[r] = -1; 2474 } 2475 PetscCall(VecRestoreArrayWrite(v, &a)); 2476 } 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 2480 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2481 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2482 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2483 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2484 2485 /* Get offdiagIdx[] for implicit 0.0 */ 2486 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2487 ba = bav; 2488 bi = b->i; 2489 bj = b->j; 2490 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2491 for (r = 0; r < m; r++) { 2492 ncols = bi[r + 1] - bi[r]; 2493 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2494 offdiagA[r] = *ba; 2495 offdiagIdx[r] = cmap[0]; 2496 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2497 offdiagA[r] = 0.0; 2498 2499 /* Find first hole in the cmap */ 2500 for (j = 0; j < ncols; j++) { 2501 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2502 if (col > j && j < cstart) { 2503 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2504 break; 2505 } else if (col > j + n && j >= cstart) { 2506 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2507 break; 2508 } 2509 } 2510 if (j == ncols && ncols < A->cmap->N - n) { 2511 /* a hole is outside compressed Bcols */ 2512 if (ncols == 0) { 2513 if (cstart) { 2514 offdiagIdx[r] = 0; 2515 } else offdiagIdx[r] = cend; 2516 } else { /* ncols > 0 */ 2517 offdiagIdx[r] = cmap[ncols - 1] + 1; 2518 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2519 } 2520 } 2521 } 2522 2523 for (j = 0; j < ncols; j++) { 2524 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2525 offdiagA[r] = *ba; 2526 offdiagIdx[r] = cmap[*bj]; 2527 } 2528 ba++; 2529 bj++; 2530 } 2531 } 2532 2533 PetscCall(VecGetArrayWrite(v, &a)); 2534 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2535 for (r = 0; r < m; ++r) { 2536 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 if (idx) idx[r] = cstart + diagIdx[r]; 2539 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2540 a[r] = diagA[r]; 2541 if (idx) { 2542 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2543 idx[r] = cstart + diagIdx[r]; 2544 } else idx[r] = offdiagIdx[r]; 2545 } 2546 } else { 2547 a[r] = offdiagA[r]; 2548 if (idx) idx[r] = offdiagIdx[r]; 2549 } 2550 } 2551 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2552 PetscCall(VecRestoreArrayWrite(v, &a)); 2553 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2554 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2555 PetscCall(VecDestroy(&diagV)); 2556 PetscCall(VecDestroy(&offdiagV)); 2557 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2558 PetscFunctionReturn(PETSC_SUCCESS); 2559 } 2560 2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2562 { 2563 Mat *dummy; 2564 2565 PetscFunctionBegin; 2566 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2567 *newmat = *dummy; 2568 PetscCall(PetscFree(dummy)); 2569 PetscFunctionReturn(PETSC_SUCCESS); 2570 } 2571 2572 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2573 { 2574 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2575 2576 PetscFunctionBegin; 2577 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2578 A->factorerrortype = a->A->factorerrortype; 2579 PetscFunctionReturn(PETSC_SUCCESS); 2580 } 2581 2582 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2583 { 2584 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2585 2586 PetscFunctionBegin; 2587 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2588 PetscCall(MatSetRandom(aij->A, rctx)); 2589 if (x->assembled) { 2590 PetscCall(MatSetRandom(aij->B, rctx)); 2591 } else { 2592 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2593 } 2594 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2595 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2600 { 2601 PetscFunctionBegin; 2602 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2603 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2604 PetscFunctionReturn(PETSC_SUCCESS); 2605 } 2606 2607 /*@ 2608 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2609 2610 Not Collective 2611 2612 Input Parameter: 2613 . A - the matrix 2614 2615 Output Parameter: 2616 . nz - the number of nonzeros 2617 2618 Level: advanced 2619 2620 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2621 @*/ 2622 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2623 { 2624 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2625 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2626 PetscBool isaij; 2627 2628 PetscFunctionBegin; 2629 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2630 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2631 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2632 PetscFunctionReturn(PETSC_SUCCESS); 2633 } 2634 2635 /*@ 2636 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2637 2638 Collective 2639 2640 Input Parameters: 2641 + A - the matrix 2642 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2643 2644 Level: advanced 2645 2646 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2647 @*/ 2648 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2649 { 2650 PetscFunctionBegin; 2651 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2656 { 2657 PetscBool sc = PETSC_FALSE, flg; 2658 2659 PetscFunctionBegin; 2660 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2661 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2662 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2663 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2664 PetscOptionsHeadEnd(); 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2669 { 2670 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2671 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2672 2673 PetscFunctionBegin; 2674 if (!Y->preallocated) { 2675 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2676 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2677 PetscInt nonew = aij->nonew; 2678 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2679 aij->nonew = nonew; 2680 } 2681 PetscCall(MatShift_Basic(Y, a)); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2686 { 2687 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2688 2689 PetscFunctionBegin; 2690 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2697 2698 PetscFunctionBegin; 2699 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2700 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2705 MatGetRow_MPIAIJ, 2706 MatRestoreRow_MPIAIJ, 2707 MatMult_MPIAIJ, 2708 /* 4*/ MatMultAdd_MPIAIJ, 2709 MatMultTranspose_MPIAIJ, 2710 MatMultTransposeAdd_MPIAIJ, 2711 NULL, 2712 NULL, 2713 NULL, 2714 /*10*/ NULL, 2715 NULL, 2716 NULL, 2717 MatSOR_MPIAIJ, 2718 MatTranspose_MPIAIJ, 2719 /*15*/ MatGetInfo_MPIAIJ, 2720 MatEqual_MPIAIJ, 2721 MatGetDiagonal_MPIAIJ, 2722 MatDiagonalScale_MPIAIJ, 2723 MatNorm_MPIAIJ, 2724 /*20*/ MatAssemblyBegin_MPIAIJ, 2725 MatAssemblyEnd_MPIAIJ, 2726 MatSetOption_MPIAIJ, 2727 MatZeroEntries_MPIAIJ, 2728 /*24*/ MatZeroRows_MPIAIJ, 2729 NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /*29*/ MatSetUp_MPI_Hash, 2734 NULL, 2735 NULL, 2736 MatGetDiagonalBlock_MPIAIJ, 2737 NULL, 2738 /*34*/ MatDuplicate_MPIAIJ, 2739 NULL, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*39*/ MatAXPY_MPIAIJ, 2744 MatCreateSubMatrices_MPIAIJ, 2745 MatIncreaseOverlap_MPIAIJ, 2746 MatGetValues_MPIAIJ, 2747 MatCopy_MPIAIJ, 2748 /*44*/ MatGetRowMax_MPIAIJ, 2749 MatScale_MPIAIJ, 2750 MatShift_MPIAIJ, 2751 MatDiagonalSet_MPIAIJ, 2752 MatZeroRowsColumns_MPIAIJ, 2753 /*49*/ MatSetRandom_MPIAIJ, 2754 MatGetRowIJ_MPIAIJ, 2755 MatRestoreRowIJ_MPIAIJ, 2756 NULL, 2757 NULL, 2758 /*54*/ MatFDColoringCreate_MPIXAIJ, 2759 NULL, 2760 MatSetUnfactored_MPIAIJ, 2761 MatPermute_MPIAIJ, 2762 NULL, 2763 /*59*/ MatCreateSubMatrix_MPIAIJ, 2764 MatDestroy_MPIAIJ, 2765 MatView_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 NULL, 2772 MatGetRowMaxAbs_MPIAIJ, 2773 /*69*/ MatGetRowMinAbs_MPIAIJ, 2774 NULL, 2775 NULL, 2776 MatFDColoringApply_AIJ, 2777 MatSetFromOptions_MPIAIJ, 2778 MatFindZeroDiagonals_MPIAIJ, 2779 /*75*/ NULL, 2780 NULL, 2781 NULL, 2782 MatLoad_MPIAIJ, 2783 NULL, 2784 /*80*/ NULL, 2785 NULL, 2786 NULL, 2787 /*83*/ NULL, 2788 NULL, 2789 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2790 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2791 NULL, 2792 NULL, 2793 /*89*/ MatBindToCPU_MPIAIJ, 2794 MatProductSetFromOptions_MPIAIJ, 2795 NULL, 2796 NULL, 2797 MatConjugate_MPIAIJ, 2798 /*94*/ NULL, 2799 MatSetValuesRow_MPIAIJ, 2800 MatRealPart_MPIAIJ, 2801 MatImaginaryPart_MPIAIJ, 2802 NULL, 2803 /*99*/ NULL, 2804 NULL, 2805 NULL, 2806 MatGetRowMin_MPIAIJ, 2807 NULL, 2808 /*104*/ MatGetSeqNonzeroStructure_MPIAIJ, 2809 NULL, 2810 MatGetGhosts_MPIAIJ, 2811 NULL, 2812 NULL, 2813 /*109*/ MatMultDiagonalBlock_MPIAIJ, 2814 NULL, 2815 NULL, 2816 NULL, 2817 MatGetMultiProcBlock_MPIAIJ, 2818 /*114*/ MatFindNonzeroRows_MPIAIJ, 2819 MatGetColumnReductions_MPIAIJ, 2820 MatInvertBlockDiagonal_MPIAIJ, 2821 MatInvertVariableBlockDiagonal_MPIAIJ, 2822 MatCreateSubMatricesMPI_MPIAIJ, 2823 /*119*/ NULL, 2824 NULL, 2825 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 /*124*/ NULL, 2829 NULL, 2830 MatSetBlockSizes_MPIAIJ, 2831 NULL, 2832 MatFDColoringSetUp_MPIXAIJ, 2833 /*129*/ MatFindOffBlockDiagonalEntries_MPIAIJ, 2834 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*134*/ MatCreateGraph_Simple_AIJ, 2839 NULL, 2840 MatEliminateZeros_MPIAIJ, 2841 MatGetRowSumAbs_MPIAIJ, 2842 NULL, 2843 /*139*/ NULL, 2844 NULL, 2845 MatCopyHashToXAIJ_MPI_Hash, 2846 MatGetCurrentMemType_MPIAIJ, 2847 NULL}; 2848 2849 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2850 { 2851 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2852 2853 PetscFunctionBegin; 2854 PetscCall(MatStoreValues(aij->A)); 2855 PetscCall(MatStoreValues(aij->B)); 2856 PetscFunctionReturn(PETSC_SUCCESS); 2857 } 2858 2859 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2860 { 2861 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2862 2863 PetscFunctionBegin; 2864 PetscCall(MatRetrieveValues(aij->A)); 2865 PetscCall(MatRetrieveValues(aij->B)); 2866 PetscFunctionReturn(PETSC_SUCCESS); 2867 } 2868 2869 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2870 { 2871 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2872 PetscMPIInt size; 2873 2874 PetscFunctionBegin; 2875 if (B->hash_active) { 2876 B->ops[0] = b->cops; 2877 B->hash_active = PETSC_FALSE; 2878 } 2879 PetscCall(PetscLayoutSetUp(B->rmap)); 2880 PetscCall(PetscLayoutSetUp(B->cmap)); 2881 2882 #if defined(PETSC_USE_CTABLE) 2883 PetscCall(PetscHMapIDestroy(&b->colmap)); 2884 #else 2885 PetscCall(PetscFree(b->colmap)); 2886 #endif 2887 PetscCall(PetscFree(b->garray)); 2888 PetscCall(VecDestroy(&b->lvec)); 2889 PetscCall(VecScatterDestroy(&b->Mvctx)); 2890 2891 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2892 2893 MatSeqXAIJGetOptions_Private(b->B); 2894 PetscCall(MatDestroy(&b->B)); 2895 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2896 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2897 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2898 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2899 MatSeqXAIJRestoreOptions_Private(b->B); 2900 2901 MatSeqXAIJGetOptions_Private(b->A); 2902 PetscCall(MatDestroy(&b->A)); 2903 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2904 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2905 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2906 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2907 MatSeqXAIJRestoreOptions_Private(b->A); 2908 2909 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2910 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2911 B->preallocated = PETSC_TRUE; 2912 B->was_assembled = PETSC_FALSE; 2913 B->assembled = PETSC_FALSE; 2914 PetscFunctionReturn(PETSC_SUCCESS); 2915 } 2916 2917 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2918 { 2919 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2920 PetscBool ondiagreset, offdiagreset, memoryreset; 2921 2922 PetscFunctionBegin; 2923 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2924 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2925 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2926 2927 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2928 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2929 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2930 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2931 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2932 2933 PetscCall(PetscLayoutSetUp(B->rmap)); 2934 PetscCall(PetscLayoutSetUp(B->cmap)); 2935 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2936 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2937 PetscCall(VecScatterDestroy(&b->Mvctx)); 2938 2939 B->preallocated = PETSC_TRUE; 2940 B->was_assembled = PETSC_FALSE; 2941 B->assembled = PETSC_FALSE; 2942 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2943 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2948 { 2949 Mat mat; 2950 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2951 2952 PetscFunctionBegin; 2953 *newmat = NULL; 2954 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2955 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2956 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2957 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2958 a = (Mat_MPIAIJ *)mat->data; 2959 2960 mat->factortype = matin->factortype; 2961 mat->assembled = matin->assembled; 2962 mat->insertmode = NOT_SET_VALUES; 2963 2964 a->size = oldmat->size; 2965 a->rank = oldmat->rank; 2966 a->donotstash = oldmat->donotstash; 2967 a->roworiented = oldmat->roworiented; 2968 a->rowindices = NULL; 2969 a->rowvalues = NULL; 2970 a->getrowactive = PETSC_FALSE; 2971 2972 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2973 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2974 if (matin->hash_active) { 2975 PetscCall(MatSetUp(mat)); 2976 } else { 2977 mat->preallocated = matin->preallocated; 2978 if (oldmat->colmap) { 2979 #if defined(PETSC_USE_CTABLE) 2980 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2981 #else 2982 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2983 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2984 #endif 2985 } else a->colmap = NULL; 2986 if (oldmat->garray) { 2987 PetscInt len; 2988 len = oldmat->B->cmap->n; 2989 PetscCall(PetscMalloc1(len, &a->garray)); 2990 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2991 } else a->garray = NULL; 2992 2993 /* It may happen MatDuplicate is called with a non-assembled matrix 2994 In fact, MatDuplicate only requires the matrix to be preallocated 2995 This may happen inside a DMCreateMatrix_Shell */ 2996 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2997 if (oldmat->Mvctx) { 2998 a->Mvctx = oldmat->Mvctx; 2999 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3000 } 3001 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3002 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3003 } 3004 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3005 *newmat = mat; 3006 PetscFunctionReturn(PETSC_SUCCESS); 3007 } 3008 3009 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3010 { 3011 PetscBool isbinary, ishdf5; 3012 3013 PetscFunctionBegin; 3014 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3015 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3016 /* force binary viewer to load .info file if it has not yet done so */ 3017 PetscCall(PetscViewerSetUp(viewer)); 3018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3020 if (isbinary) { 3021 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3022 } else if (ishdf5) { 3023 #if defined(PETSC_HAVE_HDF5) 3024 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3025 #else 3026 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3027 #endif 3028 } else { 3029 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3030 } 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3035 { 3036 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3037 PetscInt *rowidxs, *colidxs; 3038 PetscScalar *matvals; 3039 3040 PetscFunctionBegin; 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 3043 /* read in matrix header */ 3044 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3045 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3046 M = header[1]; 3047 N = header[2]; 3048 nz = header[3]; 3049 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3050 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3051 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3052 3053 /* set block sizes from the viewer's .info file */ 3054 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3055 /* set global sizes if not set already */ 3056 if (mat->rmap->N < 0) mat->rmap->N = M; 3057 if (mat->cmap->N < 0) mat->cmap->N = N; 3058 PetscCall(PetscLayoutSetUp(mat->rmap)); 3059 PetscCall(PetscLayoutSetUp(mat->cmap)); 3060 3061 /* check if the matrix sizes are correct */ 3062 PetscCall(MatGetSize(mat, &rows, &cols)); 3063 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3064 3065 /* read in row lengths and build row indices */ 3066 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3067 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3068 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3069 rowidxs[0] = 0; 3070 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3071 if (nz != PETSC_INT_MAX) { 3072 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3073 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3074 } 3075 3076 /* read in column indices and matrix values */ 3077 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3078 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3079 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3080 /* store matrix indices and values */ 3081 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3082 PetscCall(PetscFree(rowidxs)); 3083 PetscCall(PetscFree2(colidxs, matvals)); 3084 PetscFunctionReturn(PETSC_SUCCESS); 3085 } 3086 3087 /* Not scalable because of ISAllGather() unless getting all columns. */ 3088 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3089 { 3090 IS iscol_local; 3091 PetscBool isstride; 3092 PetscMPIInt gisstride = 0; 3093 3094 PetscFunctionBegin; 3095 /* check if we are grabbing all columns*/ 3096 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3097 3098 if (isstride) { 3099 PetscInt start, len, mstart, mlen; 3100 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3101 PetscCall(ISGetLocalSize(iscol, &len)); 3102 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3103 if (mstart == start && mlen - mstart == len) gisstride = 1; 3104 } 3105 3106 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3107 if (gisstride) { 3108 PetscInt N; 3109 PetscCall(MatGetSize(mat, NULL, &N)); 3110 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3111 PetscCall(ISSetIdentity(iscol_local)); 3112 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3113 } else { 3114 PetscInt cbs; 3115 PetscCall(ISGetBlockSize(iscol, &cbs)); 3116 PetscCall(ISAllGather(iscol, &iscol_local)); 3117 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3118 } 3119 3120 *isseq = iscol_local; 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /* 3125 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3126 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3127 3128 Input Parameters: 3129 + mat - matrix 3130 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3131 i.e., mat->rstart <= isrow[i] < mat->rend 3132 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3133 i.e., mat->cstart <= iscol[i] < mat->cend 3134 3135 Output Parameters: 3136 + isrow_d - sequential row index set for retrieving mat->A 3137 . iscol_d - sequential column index set for retrieving mat->A 3138 . iscol_o - sequential column index set for retrieving mat->B 3139 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3140 */ 3141 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3142 { 3143 Vec x, cmap; 3144 const PetscInt *is_idx; 3145 PetscScalar *xarray, *cmaparray; 3146 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3148 Mat B = a->B; 3149 Vec lvec = a->lvec, lcmap; 3150 PetscInt i, cstart, cend, Bn = B->cmap->N; 3151 MPI_Comm comm; 3152 VecScatter Mvctx = a->Mvctx; 3153 3154 PetscFunctionBegin; 3155 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3156 PetscCall(ISGetLocalSize(iscol, &ncols)); 3157 3158 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3159 PetscCall(MatCreateVecs(mat, &x, NULL)); 3160 PetscCall(VecSet(x, -1.0)); 3161 PetscCall(VecDuplicate(x, &cmap)); 3162 PetscCall(VecSet(cmap, -1.0)); 3163 3164 /* Get start indices */ 3165 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3166 isstart -= ncols; 3167 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3168 3169 PetscCall(ISGetIndices(iscol, &is_idx)); 3170 PetscCall(VecGetArray(x, &xarray)); 3171 PetscCall(VecGetArray(cmap, &cmaparray)); 3172 PetscCall(PetscMalloc1(ncols, &idx)); 3173 for (i = 0; i < ncols; i++) { 3174 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3175 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3176 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3177 } 3178 PetscCall(VecRestoreArray(x, &xarray)); 3179 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3180 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3181 3182 /* Get iscol_d */ 3183 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3184 PetscCall(ISGetBlockSize(iscol, &i)); 3185 PetscCall(ISSetBlockSize(*iscol_d, i)); 3186 3187 /* Get isrow_d */ 3188 PetscCall(ISGetLocalSize(isrow, &m)); 3189 rstart = mat->rmap->rstart; 3190 PetscCall(PetscMalloc1(m, &idx)); 3191 PetscCall(ISGetIndices(isrow, &is_idx)); 3192 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3193 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3194 3195 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3196 PetscCall(ISGetBlockSize(isrow, &i)); 3197 PetscCall(ISSetBlockSize(*isrow_d, i)); 3198 3199 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3200 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3201 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3202 3203 PetscCall(VecDuplicate(lvec, &lcmap)); 3204 3205 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3206 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3207 3208 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3209 /* off-process column indices */ 3210 count = 0; 3211 PetscCall(PetscMalloc1(Bn, &idx)); 3212 PetscCall(PetscMalloc1(Bn, &cmap1)); 3213 3214 PetscCall(VecGetArray(lvec, &xarray)); 3215 PetscCall(VecGetArray(lcmap, &cmaparray)); 3216 for (i = 0; i < Bn; i++) { 3217 if (PetscRealPart(xarray[i]) > -1.0) { 3218 idx[count] = i; /* local column index in off-diagonal part B */ 3219 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3220 count++; 3221 } 3222 } 3223 PetscCall(VecRestoreArray(lvec, &xarray)); 3224 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3225 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3227 /* cannot ensure iscol_o has same blocksize as iscol! */ 3228 3229 PetscCall(PetscFree(idx)); 3230 *garray = cmap1; 3231 3232 PetscCall(VecDestroy(&x)); 3233 PetscCall(VecDestroy(&cmap)); 3234 PetscCall(VecDestroy(&lcmap)); 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3240 { 3241 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3242 Mat M = NULL; 3243 MPI_Comm comm; 3244 IS iscol_d, isrow_d, iscol_o; 3245 Mat Asub = NULL, Bsub = NULL; 3246 PetscInt n, count, M_size, N_size; 3247 3248 PetscFunctionBegin; 3249 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3250 3251 if (call == MAT_REUSE_MATRIX) { 3252 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3253 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3254 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3255 3256 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3257 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3258 3259 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3260 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3261 3262 /* Update diagonal and off-diagonal portions of submat */ 3263 asub = (Mat_MPIAIJ *)(*submat)->data; 3264 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3265 PetscCall(ISGetLocalSize(iscol_o, &n)); 3266 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3267 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3268 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3269 3270 } else { /* call == MAT_INITIAL_MATRIX) */ 3271 PetscInt *garray, *garray_compact; 3272 PetscInt BsubN; 3273 3274 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3275 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3276 3277 /* Create local submatrices Asub and Bsub */ 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3279 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3280 3281 // Compact garray so its not of size Bn 3282 PetscCall(ISGetSize(iscol_o, &count)); 3283 PetscCall(PetscMalloc1(count, &garray_compact)); 3284 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3285 3286 /* Create submatrix M */ 3287 PetscCall(ISGetSize(isrow, &M_size)); 3288 PetscCall(ISGetSize(iscol, &N_size)); 3289 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3290 3291 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3292 asub = (Mat_MPIAIJ *)M->data; 3293 3294 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3295 n = asub->B->cmap->N; 3296 if (BsubN > n) { 3297 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3298 const PetscInt *idx; 3299 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3300 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3301 3302 PetscCall(PetscMalloc1(n, &idx_new)); 3303 j = 0; 3304 PetscCall(ISGetIndices(iscol_o, &idx)); 3305 for (i = 0; i < n; i++) { 3306 if (j >= BsubN) break; 3307 while (subgarray[i] > garray[j]) j++; 3308 3309 PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3310 idx_new[i] = idx[j++]; 3311 } 3312 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3313 3314 PetscCall(ISDestroy(&iscol_o)); 3315 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3316 3317 } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3318 3319 PetscCall(PetscFree(garray)); 3320 *submat = M; 3321 3322 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3323 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3324 PetscCall(ISDestroy(&isrow_d)); 3325 3326 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3327 PetscCall(ISDestroy(&iscol_d)); 3328 3329 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3330 PetscCall(ISDestroy(&iscol_o)); 3331 } 3332 PetscFunctionReturn(PETSC_SUCCESS); 3333 } 3334 3335 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3336 { 3337 IS iscol_local = NULL, isrow_d; 3338 PetscInt csize; 3339 PetscInt n, i, j, start, end; 3340 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3341 MPI_Comm comm; 3342 3343 PetscFunctionBegin; 3344 /* If isrow has same processor distribution as mat, 3345 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3346 if (call == MAT_REUSE_MATRIX) { 3347 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3348 if (isrow_d) { 3349 sameRowDist = PETSC_TRUE; 3350 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3351 } else { 3352 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3353 if (iscol_local) { 3354 sameRowDist = PETSC_TRUE; 3355 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3356 } 3357 } 3358 } else { 3359 /* Check if isrow has same processor distribution as mat */ 3360 sameDist[0] = PETSC_FALSE; 3361 PetscCall(ISGetLocalSize(isrow, &n)); 3362 if (!n) { 3363 sameDist[0] = PETSC_TRUE; 3364 } else { 3365 PetscCall(ISGetMinMax(isrow, &i, &j)); 3366 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3367 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3368 } 3369 3370 /* Check if iscol has same processor distribution as mat */ 3371 sameDist[1] = PETSC_FALSE; 3372 PetscCall(ISGetLocalSize(iscol, &n)); 3373 if (!n) { 3374 sameDist[1] = PETSC_TRUE; 3375 } else { 3376 PetscCall(ISGetMinMax(iscol, &i, &j)); 3377 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3378 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3379 } 3380 3381 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3382 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPI_C_BOOL, MPI_LAND, comm)); 3383 sameRowDist = tsameDist[0]; 3384 } 3385 3386 if (sameRowDist) { 3387 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3388 /* isrow and iscol have same processor distribution as mat */ 3389 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3390 PetscFunctionReturn(PETSC_SUCCESS); 3391 } else { /* sameRowDist */ 3392 /* isrow has same processor distribution as mat */ 3393 if (call == MAT_INITIAL_MATRIX) { 3394 PetscBool sorted; 3395 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3396 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3397 PetscCall(ISGetSize(iscol, &i)); 3398 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3399 3400 PetscCall(ISSorted(iscol_local, &sorted)); 3401 if (sorted) { 3402 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3403 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } 3406 } else { /* call == MAT_REUSE_MATRIX */ 3407 IS iscol_sub; 3408 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3409 if (iscol_sub) { 3410 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3411 PetscFunctionReturn(PETSC_SUCCESS); 3412 } 3413 } 3414 } 3415 } 3416 3417 /* General case: iscol -> iscol_local which has global size of iscol */ 3418 if (call == MAT_REUSE_MATRIX) { 3419 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3420 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3421 } else { 3422 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3423 } 3424 3425 PetscCall(ISGetLocalSize(iscol, &csize)); 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3427 3428 if (call == MAT_INITIAL_MATRIX) { 3429 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3430 PetscCall(ISDestroy(&iscol_local)); 3431 } 3432 PetscFunctionReturn(PETSC_SUCCESS); 3433 } 3434 3435 /*@C 3436 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3437 and "off-diagonal" part of the matrix in CSR format. 3438 3439 Collective 3440 3441 Input Parameters: 3442 + comm - MPI communicator 3443 . M - the global row size 3444 . N - the global column size 3445 . A - "diagonal" portion of matrix 3446 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3447 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3448 3449 Output Parameter: 3450 . mat - the matrix, with input `A` as its local diagonal matrix 3451 3452 Level: advanced 3453 3454 Notes: 3455 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3456 3457 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3458 3459 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3460 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3461 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3462 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3463 3464 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3465 3466 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3467 @*/ 3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3469 { 3470 PetscInt m, n; 3471 MatType mpi_mat_type; 3472 Mat_MPIAIJ *mpiaij; 3473 Mat C; 3474 3475 PetscFunctionBegin; 3476 PetscCall(MatCreate(comm, &C)); 3477 PetscCall(MatGetSize(A, &m, &n)); 3478 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3479 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3480 3481 PetscCall(MatSetSizes(C, m, n, M, N)); 3482 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3483 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3484 PetscCall(MatSetType(C, mpi_mat_type)); 3485 if (!garray) { 3486 const PetscScalar *ba; 3487 3488 B->nonzerostate++; 3489 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3490 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3491 } 3492 3493 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3494 PetscCall(PetscLayoutSetUp(C->rmap)); 3495 PetscCall(PetscLayoutSetUp(C->cmap)); 3496 3497 mpiaij = (Mat_MPIAIJ *)C->data; 3498 mpiaij->A = A; 3499 mpiaij->B = B; 3500 mpiaij->garray = garray; 3501 C->preallocated = PETSC_TRUE; 3502 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3503 3504 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3505 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3506 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3507 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3508 */ 3509 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3510 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3511 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3512 *mat = C; 3513 PetscFunctionReturn(PETSC_SUCCESS); 3514 } 3515 3516 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3517 3518 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3519 { 3520 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3521 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3522 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3523 Mat M, Msub, B = a->B; 3524 MatScalar *aa; 3525 Mat_SeqAIJ *aij; 3526 PetscInt *garray = a->garray, *colsub, Ncols; 3527 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3528 IS iscol_sub, iscmap; 3529 const PetscInt *is_idx, *cmap; 3530 PetscBool allcolumns = PETSC_FALSE; 3531 MPI_Comm comm; 3532 3533 PetscFunctionBegin; 3534 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3535 if (call == MAT_REUSE_MATRIX) { 3536 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3537 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3538 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3539 3540 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3541 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3542 3543 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3544 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3545 3546 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3547 3548 } else { /* call == MAT_INITIAL_MATRIX) */ 3549 PetscBool flg; 3550 3551 PetscCall(ISGetLocalSize(iscol, &n)); 3552 PetscCall(ISGetSize(iscol, &Ncols)); 3553 3554 /* (1) iscol -> nonscalable iscol_local */ 3555 /* Check for special case: each processor gets entire matrix columns */ 3556 PetscCall(ISIdentity(iscol_local, &flg)); 3557 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3558 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3559 if (allcolumns) { 3560 iscol_sub = iscol_local; 3561 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3562 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3563 3564 } else { 3565 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3566 PetscInt *idx, *cmap1, k; 3567 PetscCall(PetscMalloc1(Ncols, &idx)); 3568 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3569 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3570 count = 0; 3571 k = 0; 3572 for (i = 0; i < Ncols; i++) { 3573 j = is_idx[i]; 3574 if (j >= cstart && j < cend) { 3575 /* diagonal part of mat */ 3576 idx[count] = j; 3577 cmap1[count++] = i; /* column index in submat */ 3578 } else if (Bn) { 3579 /* off-diagonal part of mat */ 3580 if (j == garray[k]) { 3581 idx[count] = j; 3582 cmap1[count++] = i; /* column index in submat */ 3583 } else if (j > garray[k]) { 3584 while (j > garray[k] && k < Bn - 1) k++; 3585 if (j == garray[k]) { 3586 idx[count] = j; 3587 cmap1[count++] = i; /* column index in submat */ 3588 } 3589 } 3590 } 3591 } 3592 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3593 3594 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3595 PetscCall(ISGetBlockSize(iscol, &cbs)); 3596 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3597 3598 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3599 } 3600 3601 /* (3) Create sequential Msub */ 3602 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3603 } 3604 3605 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3606 aij = (Mat_SeqAIJ *)Msub->data; 3607 ii = aij->i; 3608 PetscCall(ISGetIndices(iscmap, &cmap)); 3609 3610 /* 3611 m - number of local rows 3612 Ncols - number of columns (same on all processors) 3613 rstart - first row in new global matrix generated 3614 */ 3615 PetscCall(MatGetSize(Msub, &m, NULL)); 3616 3617 if (call == MAT_INITIAL_MATRIX) { 3618 /* (4) Create parallel newmat */ 3619 PetscMPIInt rank, size; 3620 PetscInt csize; 3621 3622 PetscCallMPI(MPI_Comm_size(comm, &size)); 3623 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3624 3625 /* 3626 Determine the number of non-zeros in the diagonal and off-diagonal 3627 portions of the matrix in order to do correct preallocation 3628 */ 3629 3630 /* first get start and end of "diagonal" columns */ 3631 PetscCall(ISGetLocalSize(iscol, &csize)); 3632 if (csize == PETSC_DECIDE) { 3633 PetscCall(ISGetSize(isrow, &mglobal)); 3634 if (mglobal == Ncols) { /* square matrix */ 3635 nlocal = m; 3636 } else { 3637 nlocal = Ncols / size + ((Ncols % size) > rank); 3638 } 3639 } else { 3640 nlocal = csize; 3641 } 3642 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3643 rstart = rend - nlocal; 3644 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3645 3646 /* next, compute all the lengths */ 3647 jj = aij->j; 3648 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3649 olens = dlens + m; 3650 for (i = 0; i < m; i++) { 3651 jend = ii[i + 1] - ii[i]; 3652 olen = 0; 3653 dlen = 0; 3654 for (j = 0; j < jend; j++) { 3655 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3656 else dlen++; 3657 jj++; 3658 } 3659 olens[i] = olen; 3660 dlens[i] = dlen; 3661 } 3662 3663 PetscCall(ISGetBlockSize(isrow, &bs)); 3664 PetscCall(ISGetBlockSize(iscol, &cbs)); 3665 3666 PetscCall(MatCreate(comm, &M)); 3667 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3668 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3669 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3670 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3671 PetscCall(PetscFree(dlens)); 3672 3673 } else { /* call == MAT_REUSE_MATRIX */ 3674 M = *newmat; 3675 PetscCall(MatGetLocalSize(M, &i, NULL)); 3676 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3677 PetscCall(MatZeroEntries(M)); 3678 /* 3679 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3680 rather than the slower MatSetValues(). 3681 */ 3682 M->was_assembled = PETSC_TRUE; 3683 M->assembled = PETSC_FALSE; 3684 } 3685 3686 /* (5) Set values of Msub to *newmat */ 3687 PetscCall(PetscMalloc1(count, &colsub)); 3688 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3689 3690 jj = aij->j; 3691 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3692 for (i = 0; i < m; i++) { 3693 row = rstart + i; 3694 nz = ii[i + 1] - ii[i]; 3695 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3696 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3697 jj += nz; 3698 aa += nz; 3699 } 3700 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3701 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3702 3703 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3704 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3705 3706 PetscCall(PetscFree(colsub)); 3707 3708 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3709 if (call == MAT_INITIAL_MATRIX) { 3710 *newmat = M; 3711 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3712 PetscCall(MatDestroy(&Msub)); 3713 3714 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3715 PetscCall(ISDestroy(&iscol_sub)); 3716 3717 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3718 PetscCall(ISDestroy(&iscmap)); 3719 3720 if (iscol_local) { 3721 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3722 PetscCall(ISDestroy(&iscol_local)); 3723 } 3724 } 3725 PetscFunctionReturn(PETSC_SUCCESS); 3726 } 3727 3728 /* 3729 Not great since it makes two copies of the submatrix, first an SeqAIJ 3730 in local and then by concatenating the local matrices the end result. 3731 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3732 3733 This requires a sequential iscol with all indices. 3734 */ 3735 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3736 { 3737 PetscMPIInt rank, size; 3738 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3739 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3740 Mat M, Mreuse; 3741 MatScalar *aa, *vwork; 3742 MPI_Comm comm; 3743 Mat_SeqAIJ *aij; 3744 PetscBool colflag, allcolumns = PETSC_FALSE; 3745 3746 PetscFunctionBegin; 3747 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3748 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3749 PetscCallMPI(MPI_Comm_size(comm, &size)); 3750 3751 /* Check for special case: each processor gets entire matrix columns */ 3752 PetscCall(ISIdentity(iscol, &colflag)); 3753 PetscCall(ISGetLocalSize(iscol, &n)); 3754 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3755 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3756 3757 if (call == MAT_REUSE_MATRIX) { 3758 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3759 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3760 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3761 } else { 3762 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3763 } 3764 3765 /* 3766 m - number of local rows 3767 n - number of columns (same on all processors) 3768 rstart - first row in new global matrix generated 3769 */ 3770 PetscCall(MatGetSize(Mreuse, &m, &n)); 3771 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3772 if (call == MAT_INITIAL_MATRIX) { 3773 aij = (Mat_SeqAIJ *)Mreuse->data; 3774 ii = aij->i; 3775 jj = aij->j; 3776 3777 /* 3778 Determine the number of non-zeros in the diagonal and off-diagonal 3779 portions of the matrix in order to do correct preallocation 3780 */ 3781 3782 /* first get start and end of "diagonal" columns */ 3783 if (csize == PETSC_DECIDE) { 3784 PetscCall(ISGetSize(isrow, &mglobal)); 3785 if (mglobal == n) { /* square matrix */ 3786 nlocal = m; 3787 } else { 3788 nlocal = n / size + ((n % size) > rank); 3789 } 3790 } else { 3791 nlocal = csize; 3792 } 3793 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3794 rstart = rend - nlocal; 3795 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3796 3797 /* next, compute all the lengths */ 3798 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3799 olens = dlens + m; 3800 for (i = 0; i < m; i++) { 3801 jend = ii[i + 1] - ii[i]; 3802 olen = 0; 3803 dlen = 0; 3804 for (j = 0; j < jend; j++) { 3805 if (*jj < rstart || *jj >= rend) olen++; 3806 else dlen++; 3807 jj++; 3808 } 3809 olens[i] = olen; 3810 dlens[i] = dlen; 3811 } 3812 PetscCall(MatCreate(comm, &M)); 3813 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3814 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3815 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3816 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3817 PetscCall(PetscFree(dlens)); 3818 } else { 3819 PetscInt ml, nl; 3820 3821 M = *newmat; 3822 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3823 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3824 PetscCall(MatZeroEntries(M)); 3825 /* 3826 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3827 rather than the slower MatSetValues(). 3828 */ 3829 M->was_assembled = PETSC_TRUE; 3830 M->assembled = PETSC_FALSE; 3831 } 3832 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3833 aij = (Mat_SeqAIJ *)Mreuse->data; 3834 ii = aij->i; 3835 jj = aij->j; 3836 3837 /* trigger copy to CPU if needed */ 3838 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3839 for (i = 0; i < m; i++) { 3840 row = rstart + i; 3841 nz = ii[i + 1] - ii[i]; 3842 cwork = jj; 3843 jj = PetscSafePointerPlusOffset(jj, nz); 3844 vwork = aa; 3845 aa = PetscSafePointerPlusOffset(aa, nz); 3846 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3847 } 3848 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3849 3850 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3851 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3852 *newmat = M; 3853 3854 /* save submatrix used in processor for next request */ 3855 if (call == MAT_INITIAL_MATRIX) { 3856 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3857 PetscCall(MatDestroy(&Mreuse)); 3858 } 3859 PetscFunctionReturn(PETSC_SUCCESS); 3860 } 3861 3862 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3863 { 3864 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3865 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3866 const PetscInt *JJ; 3867 PetscBool nooffprocentries; 3868 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3869 3870 PetscFunctionBegin; 3871 PetscCall(PetscLayoutSetUp(B->rmap)); 3872 PetscCall(PetscLayoutSetUp(B->cmap)); 3873 m = B->rmap->n; 3874 cstart = B->cmap->rstart; 3875 cend = B->cmap->rend; 3876 rstart = B->rmap->rstart; 3877 irstart = Ii[0]; 3878 3879 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3880 3881 if (PetscDefined(USE_DEBUG)) { 3882 for (i = 0; i < m; i++) { 3883 nnz = Ii[i + 1] - Ii[i]; 3884 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3885 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3886 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3887 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3888 } 3889 } 3890 3891 for (i = 0; i < m; i++) { 3892 nnz = Ii[i + 1] - Ii[i]; 3893 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3894 nnz_max = PetscMax(nnz_max, nnz); 3895 d = 0; 3896 for (j = 0; j < nnz; j++) { 3897 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3898 } 3899 d_nnz[i] = d; 3900 o_nnz[i] = nnz - d; 3901 } 3902 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3903 PetscCall(PetscFree2(d_nnz, o_nnz)); 3904 3905 for (i = 0; i < m; i++) { 3906 ii = i + rstart; 3907 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3908 } 3909 nooffprocentries = B->nooffprocentries; 3910 B->nooffprocentries = PETSC_TRUE; 3911 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3912 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3913 B->nooffprocentries = nooffprocentries; 3914 3915 /* count number of entries below block diagonal */ 3916 PetscCall(PetscFree(Aij->ld)); 3917 PetscCall(PetscCalloc1(m, &ld)); 3918 Aij->ld = ld; 3919 for (i = 0; i < m; i++) { 3920 nnz = Ii[i + 1] - Ii[i]; 3921 j = 0; 3922 while (j < nnz && J[j] < cstart) j++; 3923 ld[i] = j; 3924 if (J) J += nnz; 3925 } 3926 3927 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3928 PetscFunctionReturn(PETSC_SUCCESS); 3929 } 3930 3931 /*@ 3932 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3933 (the default parallel PETSc format). 3934 3935 Collective 3936 3937 Input Parameters: 3938 + B - the matrix 3939 . i - the indices into `j` for the start of each local row (indices start with zero) 3940 . j - the column indices for each local row (indices start with zero) 3941 - v - optional values in the matrix 3942 3943 Level: developer 3944 3945 Notes: 3946 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3947 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3948 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3949 3950 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3951 3952 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3953 3954 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3955 3956 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3957 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3958 3959 The format which is used for the sparse matrix input, is equivalent to a 3960 row-major ordering.. i.e for the following matrix, the input data expected is 3961 as shown 3962 .vb 3963 1 0 0 3964 2 0 3 P0 3965 ------- 3966 4 5 6 P1 3967 3968 Process0 [P0] rows_owned=[0,1] 3969 i = {0,1,3} [size = nrow+1 = 2+1] 3970 j = {0,0,2} [size = 3] 3971 v = {1,2,3} [size = 3] 3972 3973 Process1 [P1] rows_owned=[2] 3974 i = {0,3} [size = nrow+1 = 1+1] 3975 j = {0,1,2} [size = 3] 3976 v = {4,5,6} [size = 3] 3977 .ve 3978 3979 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3980 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3981 @*/ 3982 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3983 { 3984 PetscFunctionBegin; 3985 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3986 PetscFunctionReturn(PETSC_SUCCESS); 3987 } 3988 3989 /*@ 3990 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3991 (the default parallel PETSc format). For good matrix assembly performance 3992 the user should preallocate the matrix storage by setting the parameters 3993 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3994 3995 Collective 3996 3997 Input Parameters: 3998 + B - the matrix 3999 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4000 (same value is used for all local rows) 4001 . d_nnz - array containing the number of nonzeros in the various rows of the 4002 DIAGONAL portion of the local submatrix (possibly different for each row) 4003 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4004 The size of this array is equal to the number of local rows, i.e 'm'. 4005 For matrices that will be factored, you must leave room for (and set) 4006 the diagonal entry even if it is zero. 4007 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4008 submatrix (same value is used for all local rows). 4009 - o_nnz - array containing the number of nonzeros in the various rows of the 4010 OFF-DIAGONAL portion of the local submatrix (possibly different for 4011 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4012 structure. The size of this array is equal to the number 4013 of local rows, i.e 'm'. 4014 4015 Example Usage: 4016 Consider the following 8x8 matrix with 34 non-zero values, that is 4017 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4018 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4019 as follows 4020 4021 .vb 4022 1 2 0 | 0 3 0 | 0 4 4023 Proc0 0 5 6 | 7 0 0 | 8 0 4024 9 0 10 | 11 0 0 | 12 0 4025 ------------------------------------- 4026 13 0 14 | 15 16 17 | 0 0 4027 Proc1 0 18 0 | 19 20 21 | 0 0 4028 0 0 0 | 22 23 0 | 24 0 4029 ------------------------------------- 4030 Proc2 25 26 27 | 0 0 28 | 29 0 4031 30 0 0 | 31 32 33 | 0 34 4032 .ve 4033 4034 This can be represented as a collection of submatrices as 4035 .vb 4036 A B C 4037 D E F 4038 G H I 4039 .ve 4040 4041 Where the submatrices A,B,C are owned by proc0, D,E,F are 4042 owned by proc1, G,H,I are owned by proc2. 4043 4044 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4045 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4046 The 'M','N' parameters are 8,8, and have the same values on all procs. 4047 4048 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4049 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4050 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4051 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4052 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4053 matrix, and [DF] as another `MATSEQAIJ` matrix. 4054 4055 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4056 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4057 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4058 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4059 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4060 In this case, the values of `d_nz`, `o_nz` are 4061 .vb 4062 proc0 dnz = 2, o_nz = 2 4063 proc1 dnz = 3, o_nz = 2 4064 proc2 dnz = 1, o_nz = 4 4065 .ve 4066 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4067 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4068 for proc3. i.e we are using 12+15+10=37 storage locations to store 4069 34 values. 4070 4071 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4072 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4073 In the above case the values for `d_nnz`, `o_nnz` are 4074 .vb 4075 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4076 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4077 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4078 .ve 4079 Here the space allocated is sum of all the above values i.e 34, and 4080 hence pre-allocation is perfect. 4081 4082 Level: intermediate 4083 4084 Notes: 4085 If the *_nnz parameter is given then the *_nz parameter is ignored 4086 4087 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4088 storage. The stored row and column indices begin with zero. 4089 See [Sparse Matrices](sec_matsparse) for details. 4090 4091 The parallel matrix is partitioned such that the first m0 rows belong to 4092 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4093 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4094 4095 The DIAGONAL portion of the local submatrix of a processor can be defined 4096 as the submatrix which is obtained by extraction the part corresponding to 4097 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4098 first row that belongs to the processor, r2 is the last row belonging to 4099 the this processor, and c1-c2 is range of indices of the local part of a 4100 vector suitable for applying the matrix to. This is an mxn matrix. In the 4101 common case of a square matrix, the row and column ranges are the same and 4102 the DIAGONAL part is also square. The remaining portion of the local 4103 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4104 4105 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4106 4107 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4108 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4109 You can also run with the option `-info` and look for messages with the string 4110 malloc in them to see if additional memory allocation was needed. 4111 4112 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4113 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4114 @*/ 4115 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4116 { 4117 PetscFunctionBegin; 4118 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4119 PetscValidType(B, 1); 4120 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4121 PetscFunctionReturn(PETSC_SUCCESS); 4122 } 4123 4124 /*@ 4125 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4126 CSR format for the local rows. 4127 4128 Collective 4129 4130 Input Parameters: 4131 + comm - MPI communicator 4132 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4133 . n - This value should be the same as the local size used in creating the 4134 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4135 calculated if `N` is given) For square matrices n is almost always `m`. 4136 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4137 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4138 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4139 . j - global column indices 4140 - a - optional matrix values 4141 4142 Output Parameter: 4143 . mat - the matrix 4144 4145 Level: intermediate 4146 4147 Notes: 4148 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4149 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4150 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4151 4152 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4153 4154 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4155 4156 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4157 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4158 4159 The format which is used for the sparse matrix input, is equivalent to a 4160 row-major ordering, i.e., for the following matrix, the input data expected is 4161 as shown 4162 .vb 4163 1 0 0 4164 2 0 3 P0 4165 ------- 4166 4 5 6 P1 4167 4168 Process0 [P0] rows_owned=[0,1] 4169 i = {0,1,3} [size = nrow+1 = 2+1] 4170 j = {0,0,2} [size = 3] 4171 v = {1,2,3} [size = 3] 4172 4173 Process1 [P1] rows_owned=[2] 4174 i = {0,3} [size = nrow+1 = 1+1] 4175 j = {0,1,2} [size = 3] 4176 v = {4,5,6} [size = 3] 4177 .ve 4178 4179 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4180 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4181 @*/ 4182 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4183 { 4184 PetscFunctionBegin; 4185 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4186 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4187 PetscCall(MatCreate(comm, mat)); 4188 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4189 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4190 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4191 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4192 PetscFunctionReturn(PETSC_SUCCESS); 4193 } 4194 4195 /*@ 4196 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4197 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4198 from `MatCreateMPIAIJWithArrays()` 4199 4200 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4201 4202 Collective 4203 4204 Input Parameters: 4205 + mat - the matrix 4206 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4207 . n - This value should be the same as the local size used in creating the 4208 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4209 calculated if N is given) For square matrices n is almost always m. 4210 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4211 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4212 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4213 . J - column indices 4214 - v - matrix values 4215 4216 Level: deprecated 4217 4218 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4219 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4220 @*/ 4221 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4222 { 4223 PetscInt nnz, i; 4224 PetscBool nooffprocentries; 4225 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4226 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4227 PetscScalar *ad, *ao; 4228 PetscInt ldi, Iii, md; 4229 const PetscInt *Adi = Ad->i; 4230 PetscInt *ld = Aij->ld; 4231 4232 PetscFunctionBegin; 4233 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4234 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4235 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4236 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4237 4238 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4239 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4240 4241 for (i = 0; i < m; i++) { 4242 if (PetscDefined(USE_DEBUG)) { 4243 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4244 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4245 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4246 } 4247 } 4248 nnz = Ii[i + 1] - Ii[i]; 4249 Iii = Ii[i]; 4250 ldi = ld[i]; 4251 md = Adi[i + 1] - Adi[i]; 4252 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4253 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4254 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4255 ad += md; 4256 ao += nnz - md; 4257 } 4258 nooffprocentries = mat->nooffprocentries; 4259 mat->nooffprocentries = PETSC_TRUE; 4260 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4261 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4262 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4263 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4264 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4265 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4266 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4267 mat->nooffprocentries = nooffprocentries; 4268 PetscFunctionReturn(PETSC_SUCCESS); 4269 } 4270 4271 /*@ 4272 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4273 4274 Collective 4275 4276 Input Parameters: 4277 + mat - the matrix 4278 - v - matrix values, stored by row 4279 4280 Level: intermediate 4281 4282 Notes: 4283 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4284 4285 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4286 4287 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4288 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4289 @*/ 4290 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4291 { 4292 PetscInt nnz, i, m; 4293 PetscBool nooffprocentries; 4294 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4295 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4296 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4297 PetscScalar *ad, *ao; 4298 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4299 PetscInt ldi, Iii, md; 4300 PetscInt *ld = Aij->ld; 4301 4302 PetscFunctionBegin; 4303 m = mat->rmap->n; 4304 4305 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4306 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4307 Iii = 0; 4308 for (i = 0; i < m; i++) { 4309 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4310 ldi = ld[i]; 4311 md = Adi[i + 1] - Adi[i]; 4312 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4313 ad += md; 4314 if (ao) { 4315 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4316 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4317 ao += nnz - md; 4318 } 4319 Iii += nnz; 4320 } 4321 nooffprocentries = mat->nooffprocentries; 4322 mat->nooffprocentries = PETSC_TRUE; 4323 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4324 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4325 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4326 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4327 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4328 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4329 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4330 mat->nooffprocentries = nooffprocentries; 4331 PetscFunctionReturn(PETSC_SUCCESS); 4332 } 4333 4334 /*@ 4335 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4336 (the default parallel PETSc format). For good matrix assembly performance 4337 the user should preallocate the matrix storage by setting the parameters 4338 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4339 4340 Collective 4341 4342 Input Parameters: 4343 + comm - MPI communicator 4344 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4345 This value should be the same as the local size used in creating the 4346 y vector for the matrix-vector product y = Ax. 4347 . n - This value should be the same as the local size used in creating the 4348 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4349 calculated if N is given) For square matrices n is almost always m. 4350 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4351 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4352 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4353 (same value is used for all local rows) 4354 . d_nnz - array containing the number of nonzeros in the various rows of the 4355 DIAGONAL portion of the local submatrix (possibly different for each row) 4356 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4357 The size of this array is equal to the number of local rows, i.e 'm'. 4358 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4359 submatrix (same value is used for all local rows). 4360 - o_nnz - array containing the number of nonzeros in the various rows of the 4361 OFF-DIAGONAL portion of the local submatrix (possibly different for 4362 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4363 structure. The size of this array is equal to the number 4364 of local rows, i.e 'm'. 4365 4366 Output Parameter: 4367 . A - the matrix 4368 4369 Options Database Keys: 4370 + -mat_no_inode - Do not use inodes 4371 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4372 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4373 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4374 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4375 4376 Level: intermediate 4377 4378 Notes: 4379 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4380 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4381 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4382 4383 If the *_nnz parameter is given then the *_nz parameter is ignored 4384 4385 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4386 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4387 storage requirements for this matrix. 4388 4389 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4390 processor than it must be used on all processors that share the object for 4391 that argument. 4392 4393 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4394 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4395 4396 The user MUST specify either the local or global matrix dimensions 4397 (possibly both). 4398 4399 The parallel matrix is partitioned across processors such that the 4400 first `m0` rows belong to process 0, the next `m1` rows belong to 4401 process 1, the next `m2` rows belong to process 2, etc., where 4402 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4403 values corresponding to [m x N] submatrix. 4404 4405 The columns are logically partitioned with the n0 columns belonging 4406 to 0th partition, the next n1 columns belonging to the next 4407 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4408 4409 The DIAGONAL portion of the local submatrix on any given processor 4410 is the submatrix corresponding to the rows and columns m,n 4411 corresponding to the given processor. i.e diagonal matrix on 4412 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4413 etc. The remaining portion of the local submatrix [m x (N-n)] 4414 constitute the OFF-DIAGONAL portion. The example below better 4415 illustrates this concept. The two matrices, the DIAGONAL portion and 4416 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4417 4418 For a square global matrix we define each processor's diagonal portion 4419 to be its local rows and the corresponding columns (a square submatrix); 4420 each processor's off-diagonal portion encompasses the remainder of the 4421 local matrix (a rectangular submatrix). 4422 4423 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4424 4425 When calling this routine with a single process communicator, a matrix of 4426 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4427 type of communicator, use the construction mechanism 4428 .vb 4429 MatCreate(..., &A); 4430 MatSetType(A, MATMPIAIJ); 4431 MatSetSizes(A, m, n, M, N); 4432 MatMPIAIJSetPreallocation(A, ...); 4433 .ve 4434 4435 By default, this format uses inodes (identical nodes) when possible. 4436 We search for consecutive rows with the same nonzero structure, thereby 4437 reusing matrix information to achieve increased efficiency. 4438 4439 Example Usage: 4440 Consider the following 8x8 matrix with 34 non-zero values, that is 4441 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4442 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4443 as follows 4444 4445 .vb 4446 1 2 0 | 0 3 0 | 0 4 4447 Proc0 0 5 6 | 7 0 0 | 8 0 4448 9 0 10 | 11 0 0 | 12 0 4449 ------------------------------------- 4450 13 0 14 | 15 16 17 | 0 0 4451 Proc1 0 18 0 | 19 20 21 | 0 0 4452 0 0 0 | 22 23 0 | 24 0 4453 ------------------------------------- 4454 Proc2 25 26 27 | 0 0 28 | 29 0 4455 30 0 0 | 31 32 33 | 0 34 4456 .ve 4457 4458 This can be represented as a collection of submatrices as 4459 4460 .vb 4461 A B C 4462 D E F 4463 G H I 4464 .ve 4465 4466 Where the submatrices A,B,C are owned by proc0, D,E,F are 4467 owned by proc1, G,H,I are owned by proc2. 4468 4469 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4470 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4471 The 'M','N' parameters are 8,8, and have the same values on all procs. 4472 4473 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4474 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4475 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4476 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4477 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4478 matrix, and [DF] as another SeqAIJ matrix. 4479 4480 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4481 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4482 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4483 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4484 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4485 In this case, the values of `d_nz`,`o_nz` are 4486 .vb 4487 proc0 dnz = 2, o_nz = 2 4488 proc1 dnz = 3, o_nz = 2 4489 proc2 dnz = 1, o_nz = 4 4490 .ve 4491 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4492 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4493 for proc3. i.e we are using 12+15+10=37 storage locations to store 4494 34 values. 4495 4496 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4497 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4498 In the above case the values for d_nnz,o_nnz are 4499 .vb 4500 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4501 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4502 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4503 .ve 4504 Here the space allocated is sum of all the above values i.e 34, and 4505 hence pre-allocation is perfect. 4506 4507 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4508 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4509 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4510 @*/ 4511 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4512 { 4513 PetscMPIInt size; 4514 4515 PetscFunctionBegin; 4516 PetscCall(MatCreate(comm, A)); 4517 PetscCall(MatSetSizes(*A, m, n, M, N)); 4518 PetscCallMPI(MPI_Comm_size(comm, &size)); 4519 if (size > 1) { 4520 PetscCall(MatSetType(*A, MATMPIAIJ)); 4521 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4522 } else { 4523 PetscCall(MatSetType(*A, MATSEQAIJ)); 4524 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4525 } 4526 PetscFunctionReturn(PETSC_SUCCESS); 4527 } 4528 4529 /*@C 4530 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4531 4532 Not Collective 4533 4534 Input Parameter: 4535 . A - The `MATMPIAIJ` matrix 4536 4537 Output Parameters: 4538 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4539 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4540 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4541 4542 Level: intermediate 4543 4544 Note: 4545 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4546 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4547 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4548 local column numbers to global column numbers in the original matrix. 4549 4550 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4551 @*/ 4552 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4553 { 4554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4555 PetscBool flg; 4556 4557 PetscFunctionBegin; 4558 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4559 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4560 if (Ad) *Ad = a->A; 4561 if (Ao) *Ao = a->B; 4562 if (colmap) *colmap = a->garray; 4563 PetscFunctionReturn(PETSC_SUCCESS); 4564 } 4565 4566 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4567 { 4568 PetscInt m, N, i, rstart, nnz, Ii; 4569 PetscInt *indx; 4570 PetscScalar *values; 4571 MatType rootType; 4572 4573 PetscFunctionBegin; 4574 PetscCall(MatGetSize(inmat, &m, &N)); 4575 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4576 PetscInt *dnz, *onz, sum, bs, cbs; 4577 4578 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4579 /* Check sum(n) = N */ 4580 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4581 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4582 4583 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4584 rstart -= m; 4585 4586 MatPreallocateBegin(comm, m, n, dnz, onz); 4587 for (i = 0; i < m; i++) { 4588 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4589 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4590 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4591 } 4592 4593 PetscCall(MatCreate(comm, outmat)); 4594 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4595 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4596 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4597 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4598 PetscCall(MatSetType(*outmat, rootType)); 4599 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4600 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4601 MatPreallocateEnd(dnz, onz); 4602 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4603 } 4604 4605 /* numeric phase */ 4606 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4607 for (i = 0; i < m; i++) { 4608 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4609 Ii = i + rstart; 4610 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4611 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4612 } 4613 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4614 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4615 PetscFunctionReturn(PETSC_SUCCESS); 4616 } 4617 4618 static PetscErrorCode MatMergeSeqsToMPIDestroy(void **data) 4619 { 4620 MatMergeSeqsToMPI *merge = (MatMergeSeqsToMPI *)*data; 4621 4622 PetscFunctionBegin; 4623 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4624 PetscCall(PetscFree(merge->id_r)); 4625 PetscCall(PetscFree(merge->len_s)); 4626 PetscCall(PetscFree(merge->len_r)); 4627 PetscCall(PetscFree(merge->bi)); 4628 PetscCall(PetscFree(merge->bj)); 4629 PetscCall(PetscFree(merge->buf_ri[0])); 4630 PetscCall(PetscFree(merge->buf_ri)); 4631 PetscCall(PetscFree(merge->buf_rj[0])); 4632 PetscCall(PetscFree(merge->buf_rj)); 4633 PetscCall(PetscFree(merge->coi)); 4634 PetscCall(PetscFree(merge->coj)); 4635 PetscCall(PetscFree(merge->owners_co)); 4636 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4637 PetscCall(PetscFree(merge)); 4638 PetscFunctionReturn(PETSC_SUCCESS); 4639 } 4640 4641 #include <../src/mat/utils/freespace.h> 4642 #include <petscbt.h> 4643 4644 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4645 { 4646 MPI_Comm comm; 4647 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4648 PetscMPIInt size, rank, taga, *len_s; 4649 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4650 PetscMPIInt proc, k; 4651 PetscInt **buf_ri, **buf_rj; 4652 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4653 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4654 MPI_Request *s_waits, *r_waits; 4655 MPI_Status *status; 4656 const MatScalar *aa, *a_a; 4657 MatScalar **abuf_r, *ba_i; 4658 MatMergeSeqsToMPI *merge; 4659 PetscContainer container; 4660 4661 PetscFunctionBegin; 4662 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4663 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4664 4665 PetscCallMPI(MPI_Comm_size(comm, &size)); 4666 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4667 4668 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4669 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4670 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4671 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4672 aa = a_a; 4673 4674 bi = merge->bi; 4675 bj = merge->bj; 4676 buf_ri = merge->buf_ri; 4677 buf_rj = merge->buf_rj; 4678 4679 PetscCall(PetscMalloc1(size, &status)); 4680 owners = merge->rowmap->range; 4681 len_s = merge->len_s; 4682 4683 /* send and recv matrix values */ 4684 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4685 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4686 4687 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4688 for (proc = 0, k = 0; proc < size; proc++) { 4689 if (!len_s[proc]) continue; 4690 i = owners[proc]; 4691 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4692 k++; 4693 } 4694 4695 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4696 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4697 PetscCall(PetscFree(status)); 4698 4699 PetscCall(PetscFree(s_waits)); 4700 PetscCall(PetscFree(r_waits)); 4701 4702 /* insert mat values of mpimat */ 4703 PetscCall(PetscMalloc1(N, &ba_i)); 4704 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4705 4706 for (k = 0; k < merge->nrecv; k++) { 4707 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4708 nrows = *buf_ri_k[k]; 4709 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4710 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4711 } 4712 4713 /* set values of ba */ 4714 m = merge->rowmap->n; 4715 for (i = 0; i < m; i++) { 4716 arow = owners[rank] + i; 4717 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4718 bnzi = bi[i + 1] - bi[i]; 4719 PetscCall(PetscArrayzero(ba_i, bnzi)); 4720 4721 /* add local non-zero vals of this proc's seqmat into ba */ 4722 anzi = ai[arow + 1] - ai[arow]; 4723 aj = a->j + ai[arow]; 4724 aa = a_a + ai[arow]; 4725 nextaj = 0; 4726 for (j = 0; nextaj < anzi; j++) { 4727 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4728 ba_i[j] += aa[nextaj++]; 4729 } 4730 } 4731 4732 /* add received vals into ba */ 4733 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4734 /* i-th row */ 4735 if (i == *nextrow[k]) { 4736 anzi = *(nextai[k] + 1) - *nextai[k]; 4737 aj = buf_rj[k] + *nextai[k]; 4738 aa = abuf_r[k] + *nextai[k]; 4739 nextaj = 0; 4740 for (j = 0; nextaj < anzi; j++) { 4741 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4742 ba_i[j] += aa[nextaj++]; 4743 } 4744 } 4745 nextrow[k]++; 4746 nextai[k]++; 4747 } 4748 } 4749 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4750 } 4751 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4752 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4753 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4754 4755 PetscCall(PetscFree(abuf_r[0])); 4756 PetscCall(PetscFree(abuf_r)); 4757 PetscCall(PetscFree(ba_i)); 4758 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4759 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4760 PetscFunctionReturn(PETSC_SUCCESS); 4761 } 4762 4763 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4764 { 4765 Mat B_mpi; 4766 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4767 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4768 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4769 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4770 PetscInt len, *dnz, *onz, bs, cbs; 4771 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4772 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4773 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4774 MPI_Status *status; 4775 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4776 PetscBT lnkbt; 4777 MatMergeSeqsToMPI *merge; 4778 PetscContainer container; 4779 4780 PetscFunctionBegin; 4781 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4782 4783 /* make sure it is a PETSc comm */ 4784 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4785 PetscCallMPI(MPI_Comm_size(comm, &size)); 4786 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4787 4788 PetscCall(PetscNew(&merge)); 4789 PetscCall(PetscMalloc1(size, &status)); 4790 4791 /* determine row ownership */ 4792 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4793 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4794 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4795 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4796 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4797 PetscCall(PetscMalloc1(size, &len_si)); 4798 PetscCall(PetscMalloc1(size, &merge->len_s)); 4799 4800 m = merge->rowmap->n; 4801 owners = merge->rowmap->range; 4802 4803 /* determine the number of messages to send, their lengths */ 4804 len_s = merge->len_s; 4805 4806 len = 0; /* length of buf_si[] */ 4807 merge->nsend = 0; 4808 for (PetscMPIInt proc = 0; proc < size; proc++) { 4809 len_si[proc] = 0; 4810 if (proc == rank) { 4811 len_s[proc] = 0; 4812 } else { 4813 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4814 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4815 } 4816 if (len_s[proc]) { 4817 merge->nsend++; 4818 nrows = 0; 4819 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4820 if (ai[i + 1] > ai[i]) nrows++; 4821 } 4822 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4823 len += len_si[proc]; 4824 } 4825 } 4826 4827 /* determine the number and length of messages to receive for ij-structure */ 4828 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4829 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4830 4831 /* post the Irecv of j-structure */ 4832 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4833 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4834 4835 /* post the Isend of j-structure */ 4836 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4837 4838 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4839 if (!len_s[proc]) continue; 4840 i = owners[proc]; 4841 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4842 k++; 4843 } 4844 4845 /* receives and sends of j-structure are complete */ 4846 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4847 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4848 4849 /* send and recv i-structure */ 4850 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4851 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4852 4853 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4854 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4855 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4856 if (!len_s[proc]) continue; 4857 /* form outgoing message for i-structure: 4858 buf_si[0]: nrows to be sent 4859 [1:nrows]: row index (global) 4860 [nrows+1:2*nrows+1]: i-structure index 4861 */ 4862 nrows = len_si[proc] / 2 - 1; 4863 buf_si_i = buf_si + nrows + 1; 4864 buf_si[0] = nrows; 4865 buf_si_i[0] = 0; 4866 nrows = 0; 4867 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4868 anzi = ai[i + 1] - ai[i]; 4869 if (anzi) { 4870 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4871 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4872 nrows++; 4873 } 4874 } 4875 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4876 k++; 4877 buf_si += len_si[proc]; 4878 } 4879 4880 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4881 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4882 4883 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4884 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4885 4886 PetscCall(PetscFree(len_si)); 4887 PetscCall(PetscFree(len_ri)); 4888 PetscCall(PetscFree(rj_waits)); 4889 PetscCall(PetscFree2(si_waits, sj_waits)); 4890 PetscCall(PetscFree(ri_waits)); 4891 PetscCall(PetscFree(buf_s)); 4892 PetscCall(PetscFree(status)); 4893 4894 /* compute a local seq matrix in each processor */ 4895 /* allocate bi array and free space for accumulating nonzero column info */ 4896 PetscCall(PetscMalloc1(m + 1, &bi)); 4897 bi[0] = 0; 4898 4899 /* create and initialize a linked list */ 4900 nlnk = N + 1; 4901 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4902 4903 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4904 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4905 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4906 4907 current_space = free_space; 4908 4909 /* determine symbolic info for each local row */ 4910 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4911 4912 for (k = 0; k < merge->nrecv; k++) { 4913 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4914 nrows = *buf_ri_k[k]; 4915 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4916 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4917 } 4918 4919 MatPreallocateBegin(comm, m, n, dnz, onz); 4920 len = 0; 4921 for (i = 0; i < m; i++) { 4922 bnzi = 0; 4923 /* add local non-zero cols of this proc's seqmat into lnk */ 4924 arow = owners[rank] + i; 4925 anzi = ai[arow + 1] - ai[arow]; 4926 aj = a->j + ai[arow]; 4927 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4928 bnzi += nlnk; 4929 /* add received col data into lnk */ 4930 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4931 if (i == *nextrow[k]) { /* i-th row */ 4932 anzi = *(nextai[k] + 1) - *nextai[k]; 4933 aj = buf_rj[k] + *nextai[k]; 4934 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4935 bnzi += nlnk; 4936 nextrow[k]++; 4937 nextai[k]++; 4938 } 4939 } 4940 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4941 4942 /* if free space is not available, make more free space */ 4943 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4944 /* copy data into free space, then initialize lnk */ 4945 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4946 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4947 4948 current_space->array += bnzi; 4949 current_space->local_used += bnzi; 4950 current_space->local_remaining -= bnzi; 4951 4952 bi[i + 1] = bi[i] + bnzi; 4953 } 4954 4955 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4956 4957 PetscCall(PetscMalloc1(bi[m], &bj)); 4958 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4959 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4960 4961 /* create symbolic parallel matrix B_mpi */ 4962 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4963 PetscCall(MatCreate(comm, &B_mpi)); 4964 if (n == PETSC_DECIDE) { 4965 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4966 } else { 4967 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4968 } 4969 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4970 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4971 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4972 MatPreallocateEnd(dnz, onz); 4973 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4974 4975 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4976 B_mpi->assembled = PETSC_FALSE; 4977 merge->bi = bi; 4978 merge->bj = bj; 4979 merge->buf_ri = buf_ri; 4980 merge->buf_rj = buf_rj; 4981 merge->coi = NULL; 4982 merge->coj = NULL; 4983 merge->owners_co = NULL; 4984 4985 PetscCall(PetscCommDestroy(&comm)); 4986 4987 /* attach the supporting struct to B_mpi for reuse */ 4988 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4989 PetscCall(PetscContainerSetPointer(container, merge)); 4990 PetscCall(PetscContainerSetCtxDestroy(container, MatMergeSeqsToMPIDestroy)); 4991 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4992 PetscCall(PetscContainerDestroy(&container)); 4993 *mpimat = B_mpi; 4994 4995 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4996 PetscFunctionReturn(PETSC_SUCCESS); 4997 } 4998 4999 /*@ 5000 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5001 matrices from each processor 5002 5003 Collective 5004 5005 Input Parameters: 5006 + comm - the communicators the parallel matrix will live on 5007 . seqmat - the input sequential matrices 5008 . m - number of local rows (or `PETSC_DECIDE`) 5009 . n - number of local columns (or `PETSC_DECIDE`) 5010 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5011 5012 Output Parameter: 5013 . mpimat - the parallel matrix generated 5014 5015 Level: advanced 5016 5017 Note: 5018 The dimensions of the sequential matrix in each processor MUST be the same. 5019 The input seqmat is included into the container `MatMergeSeqsToMPIDestroy`, and will be 5020 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5021 5022 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5023 @*/ 5024 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5025 { 5026 PetscMPIInt size; 5027 5028 PetscFunctionBegin; 5029 PetscCallMPI(MPI_Comm_size(comm, &size)); 5030 if (size == 1) { 5031 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5032 if (scall == MAT_INITIAL_MATRIX) { 5033 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5034 } else { 5035 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5036 } 5037 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5038 PetscFunctionReturn(PETSC_SUCCESS); 5039 } 5040 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5041 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5042 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5043 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5044 PetscFunctionReturn(PETSC_SUCCESS); 5045 } 5046 5047 /*@ 5048 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5049 5050 Not Collective 5051 5052 Input Parameter: 5053 . A - the matrix 5054 5055 Output Parameter: 5056 . A_loc - the local sequential matrix generated 5057 5058 Level: developer 5059 5060 Notes: 5061 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5062 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5063 `n` is the global column count obtained with `MatGetSize()` 5064 5065 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5066 5067 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5068 5069 Destroy the matrix with `MatDestroy()` 5070 5071 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5072 @*/ 5073 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5074 { 5075 PetscBool mpi; 5076 5077 PetscFunctionBegin; 5078 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5079 if (mpi) { 5080 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5081 } else { 5082 *A_loc = A; 5083 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5084 } 5085 PetscFunctionReturn(PETSC_SUCCESS); 5086 } 5087 5088 /*@ 5089 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5090 5091 Not Collective 5092 5093 Input Parameters: 5094 + A - the matrix 5095 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5096 5097 Output Parameter: 5098 . A_loc - the local sequential matrix generated 5099 5100 Level: developer 5101 5102 Notes: 5103 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5104 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5105 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5106 5107 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5108 5109 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5110 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5111 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5112 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5113 5114 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5115 @*/ 5116 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5117 { 5118 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5119 Mat_SeqAIJ *mat, *a, *b; 5120 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5121 const PetscScalar *aa, *ba, *aav, *bav; 5122 PetscScalar *ca, *cam; 5123 PetscMPIInt size; 5124 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5125 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5126 PetscBool match; 5127 5128 PetscFunctionBegin; 5129 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5130 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5131 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5132 if (size == 1) { 5133 if (scall == MAT_INITIAL_MATRIX) { 5134 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5135 *A_loc = mpimat->A; 5136 } else if (scall == MAT_REUSE_MATRIX) { 5137 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5138 } 5139 PetscFunctionReturn(PETSC_SUCCESS); 5140 } 5141 5142 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5143 a = (Mat_SeqAIJ *)mpimat->A->data; 5144 b = (Mat_SeqAIJ *)mpimat->B->data; 5145 ai = a->i; 5146 aj = a->j; 5147 bi = b->i; 5148 bj = b->j; 5149 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5150 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5151 aa = aav; 5152 ba = bav; 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 PetscCall(PetscMalloc1(1 + am, &ci)); 5155 ci[0] = 0; 5156 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5157 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5158 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5159 k = 0; 5160 for (i = 0; i < am; i++) { 5161 ncols_o = bi[i + 1] - bi[i]; 5162 ncols_d = ai[i + 1] - ai[i]; 5163 /* off-diagonal portion of A */ 5164 for (jo = 0; jo < ncols_o; jo++) { 5165 col = cmap[*bj]; 5166 if (col >= cstart) break; 5167 cj[k] = col; 5168 bj++; 5169 ca[k++] = *ba++; 5170 } 5171 /* diagonal portion of A */ 5172 for (j = 0; j < ncols_d; j++) { 5173 cj[k] = cstart + *aj++; 5174 ca[k++] = *aa++; 5175 } 5176 /* off-diagonal portion of A */ 5177 for (j = jo; j < ncols_o; j++) { 5178 cj[k] = cmap[*bj++]; 5179 ca[k++] = *ba++; 5180 } 5181 } 5182 /* put together the new matrix */ 5183 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5184 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5185 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5186 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5187 mat->free_a = PETSC_TRUE; 5188 mat->free_ij = PETSC_TRUE; 5189 mat->nonew = 0; 5190 } else if (scall == MAT_REUSE_MATRIX) { 5191 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5192 ci = mat->i; 5193 cj = mat->j; 5194 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5195 for (i = 0; i < am; i++) { 5196 /* off-diagonal portion of A */ 5197 ncols_o = bi[i + 1] - bi[i]; 5198 for (jo = 0; jo < ncols_o; jo++) { 5199 col = cmap[*bj]; 5200 if (col >= cstart) break; 5201 *cam++ = *ba++; 5202 bj++; 5203 } 5204 /* diagonal portion of A */ 5205 ncols_d = ai[i + 1] - ai[i]; 5206 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5207 /* off-diagonal portion of A */ 5208 for (j = jo; j < ncols_o; j++) { 5209 *cam++ = *ba++; 5210 bj++; 5211 } 5212 } 5213 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5214 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5215 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5216 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5217 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5218 PetscFunctionReturn(PETSC_SUCCESS); 5219 } 5220 5221 /*@ 5222 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5223 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5224 5225 Not Collective 5226 5227 Input Parameters: 5228 + A - the matrix 5229 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5230 5231 Output Parameters: 5232 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5233 - A_loc - the local sequential matrix generated 5234 5235 Level: developer 5236 5237 Note: 5238 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5239 part, then those associated with the off-diagonal part (in its local ordering) 5240 5241 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5242 @*/ 5243 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5244 { 5245 Mat Ao, Ad; 5246 const PetscInt *cmap; 5247 PetscMPIInt size; 5248 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5249 5250 PetscFunctionBegin; 5251 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5252 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5253 if (size == 1) { 5254 if (scall == MAT_INITIAL_MATRIX) { 5255 PetscCall(PetscObjectReference((PetscObject)Ad)); 5256 *A_loc = Ad; 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5259 } 5260 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5261 PetscFunctionReturn(PETSC_SUCCESS); 5262 } 5263 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5264 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5265 if (f) { 5266 PetscCall((*f)(A, scall, glob, A_loc)); 5267 } else { 5268 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5269 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5270 Mat_SeqAIJ *c; 5271 PetscInt *ai = a->i, *aj = a->j; 5272 PetscInt *bi = b->i, *bj = b->j; 5273 PetscInt *ci, *cj; 5274 const PetscScalar *aa, *ba; 5275 PetscScalar *ca; 5276 PetscInt i, j, am, dn, on; 5277 5278 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5279 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5280 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5281 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5282 if (scall == MAT_INITIAL_MATRIX) { 5283 PetscInt k; 5284 PetscCall(PetscMalloc1(1 + am, &ci)); 5285 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5286 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5287 ci[0] = 0; 5288 for (i = 0, k = 0; i < am; i++) { 5289 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5290 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5291 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5292 /* diagonal portion of A */ 5293 for (j = 0; j < ncols_d; j++, k++) { 5294 cj[k] = *aj++; 5295 ca[k] = *aa++; 5296 } 5297 /* off-diagonal portion of A */ 5298 for (j = 0; j < ncols_o; j++, k++) { 5299 cj[k] = dn + *bj++; 5300 ca[k] = *ba++; 5301 } 5302 } 5303 /* put together the new matrix */ 5304 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5305 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5306 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5307 c = (Mat_SeqAIJ *)(*A_loc)->data; 5308 c->free_a = PETSC_TRUE; 5309 c->free_ij = PETSC_TRUE; 5310 c->nonew = 0; 5311 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5312 } else if (scall == MAT_REUSE_MATRIX) { 5313 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5314 for (i = 0; i < am; i++) { 5315 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5316 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5317 /* diagonal portion of A */ 5318 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5319 /* off-diagonal portion of A */ 5320 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5321 } 5322 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5324 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5325 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5326 if (glob) { 5327 PetscInt cst, *gidx; 5328 5329 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5330 PetscCall(PetscMalloc1(dn + on, &gidx)); 5331 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5332 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5333 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5334 } 5335 } 5336 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5337 PetscFunctionReturn(PETSC_SUCCESS); 5338 } 5339 5340 /*@C 5341 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5342 5343 Not Collective 5344 5345 Input Parameters: 5346 + A - the matrix 5347 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5348 . row - index set of rows to extract (or `NULL`) 5349 - col - index set of columns to extract (or `NULL`) 5350 5351 Output Parameter: 5352 . A_loc - the local sequential matrix generated 5353 5354 Level: developer 5355 5356 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5357 @*/ 5358 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5359 { 5360 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5361 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5362 IS isrowa, iscola; 5363 Mat *aloc; 5364 PetscBool match; 5365 5366 PetscFunctionBegin; 5367 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5368 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5369 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5370 if (!row) { 5371 start = A->rmap->rstart; 5372 end = A->rmap->rend; 5373 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5374 } else { 5375 isrowa = *row; 5376 } 5377 if (!col) { 5378 start = A->cmap->rstart; 5379 cmap = a->garray; 5380 nzA = a->A->cmap->n; 5381 nzB = a->B->cmap->n; 5382 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5383 ncols = 0; 5384 for (i = 0; i < nzB; i++) { 5385 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5386 else break; 5387 } 5388 imark = i; 5389 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5390 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5391 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5392 } else { 5393 iscola = *col; 5394 } 5395 if (scall != MAT_INITIAL_MATRIX) { 5396 PetscCall(PetscMalloc1(1, &aloc)); 5397 aloc[0] = *A_loc; 5398 } 5399 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5400 if (!col) { /* attach global id of condensed columns */ 5401 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5402 } 5403 *A_loc = aloc[0]; 5404 PetscCall(PetscFree(aloc)); 5405 if (!row) PetscCall(ISDestroy(&isrowa)); 5406 if (!col) PetscCall(ISDestroy(&iscola)); 5407 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5408 PetscFunctionReturn(PETSC_SUCCESS); 5409 } 5410 5411 /* 5412 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5413 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5414 * on a global size. 5415 * */ 5416 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5417 { 5418 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5419 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5420 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5421 PetscMPIInt owner; 5422 PetscSFNode *iremote, *oiremote; 5423 const PetscInt *lrowindices; 5424 PetscSF sf, osf; 5425 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5426 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5427 MPI_Comm comm; 5428 ISLocalToGlobalMapping mapping; 5429 const PetscScalar *pd_a, *po_a; 5430 5431 PetscFunctionBegin; 5432 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5433 /* plocalsize is the number of roots 5434 * nrows is the number of leaves 5435 * */ 5436 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5437 PetscCall(ISGetLocalSize(rows, &nrows)); 5438 PetscCall(PetscCalloc1(nrows, &iremote)); 5439 PetscCall(ISGetIndices(rows, &lrowindices)); 5440 for (i = 0; i < nrows; i++) { 5441 /* Find a remote index and an owner for a row 5442 * The row could be local or remote 5443 * */ 5444 owner = 0; 5445 lidx = 0; 5446 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5447 iremote[i].index = lidx; 5448 iremote[i].rank = owner; 5449 } 5450 /* Create SF to communicate how many nonzero columns for each row */ 5451 PetscCall(PetscSFCreate(comm, &sf)); 5452 /* SF will figure out the number of nonzero columns for each row, and their 5453 * offsets 5454 * */ 5455 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5456 PetscCall(PetscSFSetFromOptions(sf)); 5457 PetscCall(PetscSFSetUp(sf)); 5458 5459 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5460 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5461 PetscCall(PetscCalloc1(nrows, &pnnz)); 5462 roffsets[0] = 0; 5463 roffsets[1] = 0; 5464 for (i = 0; i < plocalsize; i++) { 5465 /* diagonal */ 5466 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5467 /* off-diagonal */ 5468 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5469 /* compute offsets so that we relative location for each row */ 5470 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5471 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5472 } 5473 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5474 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5475 /* 'r' means root, and 'l' means leaf */ 5476 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5477 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5478 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5479 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5480 PetscCall(PetscSFDestroy(&sf)); 5481 PetscCall(PetscFree(roffsets)); 5482 PetscCall(PetscFree(nrcols)); 5483 dntotalcols = 0; 5484 ontotalcols = 0; 5485 ncol = 0; 5486 for (i = 0; i < nrows; i++) { 5487 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5488 ncol = PetscMax(pnnz[i], ncol); 5489 /* diagonal */ 5490 dntotalcols += nlcols[i * 2 + 0]; 5491 /* off-diagonal */ 5492 ontotalcols += nlcols[i * 2 + 1]; 5493 } 5494 /* We do not need to figure the right number of columns 5495 * since all the calculations will be done by going through the raw data 5496 * */ 5497 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5498 PetscCall(MatSetUp(*P_oth)); 5499 PetscCall(PetscFree(pnnz)); 5500 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5501 /* diagonal */ 5502 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5503 /* off-diagonal */ 5504 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5505 /* diagonal */ 5506 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5507 /* off-diagonal */ 5508 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5509 dntotalcols = 0; 5510 ontotalcols = 0; 5511 ntotalcols = 0; 5512 for (i = 0; i < nrows; i++) { 5513 owner = 0; 5514 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5515 /* Set iremote for diag matrix */ 5516 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5517 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5518 iremote[dntotalcols].rank = owner; 5519 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5520 ilocal[dntotalcols++] = ntotalcols++; 5521 } 5522 /* off-diagonal */ 5523 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5524 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5525 oiremote[ontotalcols].rank = owner; 5526 oilocal[ontotalcols++] = ntotalcols++; 5527 } 5528 } 5529 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5530 PetscCall(PetscFree(loffsets)); 5531 PetscCall(PetscFree(nlcols)); 5532 PetscCall(PetscSFCreate(comm, &sf)); 5533 /* P serves as roots and P_oth is leaves 5534 * Diag matrix 5535 * */ 5536 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5537 PetscCall(PetscSFSetFromOptions(sf)); 5538 PetscCall(PetscSFSetUp(sf)); 5539 5540 PetscCall(PetscSFCreate(comm, &osf)); 5541 /* off-diagonal */ 5542 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5543 PetscCall(PetscSFSetFromOptions(osf)); 5544 PetscCall(PetscSFSetUp(osf)); 5545 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5546 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5547 /* operate on the matrix internal data to save memory */ 5548 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5550 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5551 /* Convert to global indices for diag matrix */ 5552 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5553 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5554 /* We want P_oth store global indices */ 5555 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5556 /* Use memory scalable approach */ 5557 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5558 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5559 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5560 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5561 /* Convert back to local indices */ 5562 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5563 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5564 nout = 0; 5565 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5566 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5567 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5568 /* Exchange values */ 5569 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5570 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5571 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5572 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5573 /* Stop PETSc from shrinking memory */ 5574 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5575 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5576 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5577 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5578 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5579 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5580 PetscCall(PetscSFDestroy(&sf)); 5581 PetscCall(PetscSFDestroy(&osf)); 5582 PetscFunctionReturn(PETSC_SUCCESS); 5583 } 5584 5585 /* 5586 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5587 * This supports MPIAIJ and MAIJ 5588 * */ 5589 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5590 { 5591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5592 Mat_SeqAIJ *p_oth; 5593 IS rows, map; 5594 PetscHMapI hamp; 5595 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5596 MPI_Comm comm; 5597 PetscSF sf, osf; 5598 PetscBool has; 5599 5600 PetscFunctionBegin; 5601 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5602 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5603 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5604 * and then create a submatrix (that often is an overlapping matrix) 5605 * */ 5606 if (reuse == MAT_INITIAL_MATRIX) { 5607 /* Use a hash table to figure out unique keys */ 5608 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5609 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5610 count = 0; 5611 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5612 for (i = 0; i < a->B->cmap->n; i++) { 5613 key = a->garray[i] / dof; 5614 PetscCall(PetscHMapIHas(hamp, key, &has)); 5615 if (!has) { 5616 mapping[i] = count; 5617 PetscCall(PetscHMapISet(hamp, key, count++)); 5618 } else { 5619 /* Current 'i' has the same value the previous step */ 5620 mapping[i] = count - 1; 5621 } 5622 } 5623 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5624 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5625 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5626 PetscCall(PetscCalloc1(htsize, &rowindices)); 5627 off = 0; 5628 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5629 PetscCall(PetscHMapIDestroy(&hamp)); 5630 PetscCall(PetscSortInt(htsize, rowindices)); 5631 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5632 /* In case, the matrix was already created but users want to recreate the matrix */ 5633 PetscCall(MatDestroy(P_oth)); 5634 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5635 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5636 PetscCall(ISDestroy(&map)); 5637 PetscCall(ISDestroy(&rows)); 5638 } else if (reuse == MAT_REUSE_MATRIX) { 5639 /* If matrix was already created, we simply update values using SF objects 5640 * that as attached to the matrix earlier. 5641 */ 5642 const PetscScalar *pd_a, *po_a; 5643 5644 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5645 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5646 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5647 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5648 /* Update values in place */ 5649 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5650 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5651 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5652 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5653 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5654 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5655 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5656 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5657 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5658 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5659 PetscFunctionReturn(PETSC_SUCCESS); 5660 } 5661 5662 /*@C 5663 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5664 5665 Collective 5666 5667 Input Parameters: 5668 + A - the first matrix in `MATMPIAIJ` format 5669 . B - the second matrix in `MATMPIAIJ` format 5670 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5671 5672 Output Parameters: 5673 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5674 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5675 - B_seq - the sequential matrix generated 5676 5677 Level: developer 5678 5679 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5680 @*/ 5681 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5682 { 5683 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5684 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5685 IS isrowb, iscolb; 5686 Mat *bseq = NULL; 5687 5688 PetscFunctionBegin; 5689 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5690 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5691 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5692 5693 if (scall == MAT_INITIAL_MATRIX) { 5694 start = A->cmap->rstart; 5695 cmap = a->garray; 5696 nzA = a->A->cmap->n; 5697 nzB = a->B->cmap->n; 5698 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5699 ncols = 0; 5700 for (i = 0; i < nzB; i++) { /* row < local row index */ 5701 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5702 else break; 5703 } 5704 imark = i; 5705 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5706 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5707 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5708 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5709 } else { 5710 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5711 isrowb = *rowb; 5712 iscolb = *colb; 5713 PetscCall(PetscMalloc1(1, &bseq)); 5714 bseq[0] = *B_seq; 5715 } 5716 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5717 *B_seq = bseq[0]; 5718 PetscCall(PetscFree(bseq)); 5719 if (!rowb) { 5720 PetscCall(ISDestroy(&isrowb)); 5721 } else { 5722 *rowb = isrowb; 5723 } 5724 if (!colb) { 5725 PetscCall(ISDestroy(&iscolb)); 5726 } else { 5727 *colb = iscolb; 5728 } 5729 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5730 PetscFunctionReturn(PETSC_SUCCESS); 5731 } 5732 5733 /* 5734 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5735 of the OFF-DIAGONAL portion of local A 5736 5737 Collective 5738 5739 Input Parameters: 5740 + A,B - the matrices in `MATMPIAIJ` format 5741 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5742 5743 Output Parameter: 5744 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5745 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5746 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5747 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5748 5749 Developer Note: 5750 This directly accesses information inside the VecScatter associated with the matrix-vector product 5751 for this matrix. This is not desirable.. 5752 5753 Level: developer 5754 5755 */ 5756 5757 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5758 { 5759 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5760 VecScatter ctx; 5761 MPI_Comm comm; 5762 const PetscMPIInt *rprocs, *sprocs; 5763 PetscMPIInt nrecvs, nsends; 5764 const PetscInt *srow, *rstarts, *sstarts; 5765 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5766 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5767 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5768 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5769 PetscMPIInt size, tag, rank, nreqs; 5770 5771 PetscFunctionBegin; 5772 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5773 PetscCallMPI(MPI_Comm_size(comm, &size)); 5774 5775 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5776 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5777 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5778 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5779 5780 if (size == 1) { 5781 startsj_s = NULL; 5782 bufa_ptr = NULL; 5783 *B_oth = NULL; 5784 PetscFunctionReturn(PETSC_SUCCESS); 5785 } 5786 5787 ctx = a->Mvctx; 5788 tag = ((PetscObject)ctx)->tag; 5789 5790 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5791 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5792 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5793 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5794 PetscCall(PetscMalloc1(nreqs, &reqs)); 5795 rwaits = reqs; 5796 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5797 5798 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5799 if (scall == MAT_INITIAL_MATRIX) { 5800 /* i-array */ 5801 /* post receives */ 5802 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5803 for (i = 0; i < nrecvs; i++) { 5804 rowlen = rvalues + rstarts[i] * rbs; 5805 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5806 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5807 } 5808 5809 /* pack the outgoing message */ 5810 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5811 5812 sstartsj[0] = 0; 5813 rstartsj[0] = 0; 5814 len = 0; /* total length of j or a array to be sent */ 5815 if (nsends) { 5816 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5817 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5818 } 5819 for (i = 0; i < nsends; i++) { 5820 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5821 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5822 for (j = 0; j < nrows; j++) { 5823 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5824 for (l = 0; l < sbs; l++) { 5825 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5826 5827 rowlen[j * sbs + l] = ncols; 5828 5829 len += ncols; 5830 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5831 } 5832 k++; 5833 } 5834 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5835 5836 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5837 } 5838 /* recvs and sends of i-array are completed */ 5839 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5840 PetscCall(PetscFree(svalues)); 5841 5842 /* allocate buffers for sending j and a arrays */ 5843 PetscCall(PetscMalloc1(len, &bufj)); 5844 PetscCall(PetscMalloc1(len, &bufa)); 5845 5846 /* create i-array of B_oth */ 5847 PetscCall(PetscMalloc1(aBn + 1, &b_othi)); 5848 5849 b_othi[0] = 0; 5850 len = 0; /* total length of j or a array to be received */ 5851 k = 0; 5852 for (i = 0; i < nrecvs; i++) { 5853 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5854 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5855 for (j = 0; j < nrows; j++) { 5856 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5857 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5858 k++; 5859 } 5860 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5861 } 5862 PetscCall(PetscFree(rvalues)); 5863 5864 /* allocate space for j and a arrays of B_oth */ 5865 PetscCall(PetscMalloc1(b_othi[aBn], &b_othj)); 5866 PetscCall(PetscMalloc1(b_othi[aBn], &b_otha)); 5867 5868 /* j-array */ 5869 /* post receives of j-array */ 5870 for (i = 0; i < nrecvs; i++) { 5871 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5872 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_othj, rstartsj[i]), nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5873 } 5874 5875 /* pack the outgoing message j-array */ 5876 if (nsends) k = sstarts[0]; 5877 for (i = 0; i < nsends; i++) { 5878 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5879 bufJ = PetscSafePointerPlusOffset(bufj, sstartsj[i]); 5880 for (j = 0; j < nrows; j++) { 5881 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5882 for (ll = 0; ll < sbs; ll++) { 5883 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5884 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5885 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5886 } 5887 } 5888 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufj, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5889 } 5890 5891 /* recvs and sends of j-array are completed */ 5892 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5893 } else if (scall == MAT_REUSE_MATRIX) { 5894 sstartsj = *startsj_s; 5895 rstartsj = *startsj_r; 5896 bufa = *bufa_ptr; 5897 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5898 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5899 5900 /* a-array */ 5901 /* post receives of a-array */ 5902 for (i = 0; i < nrecvs; i++) { 5903 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5904 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_otha, rstartsj[i]), nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5905 } 5906 5907 /* pack the outgoing message a-array */ 5908 if (nsends) k = sstarts[0]; 5909 for (i = 0; i < nsends; i++) { 5910 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5911 bufA = PetscSafePointerPlusOffset(bufa, sstartsj[i]); 5912 for (j = 0; j < nrows; j++) { 5913 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5914 for (ll = 0; ll < sbs; ll++) { 5915 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5916 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5917 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5918 } 5919 } 5920 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufa, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5921 } 5922 /* recvs and sends of a-array are completed */ 5923 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5924 PetscCall(PetscFree(reqs)); 5925 5926 if (scall == MAT_INITIAL_MATRIX) { 5927 Mat_SeqAIJ *b_oth; 5928 5929 /* put together the new matrix */ 5930 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5931 5932 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5933 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5934 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5935 b_oth->free_a = PETSC_TRUE; 5936 b_oth->free_ij = PETSC_TRUE; 5937 b_oth->nonew = 0; 5938 5939 PetscCall(PetscFree(bufj)); 5940 if (!startsj_s || !bufa_ptr) { 5941 PetscCall(PetscFree2(sstartsj, rstartsj)); 5942 PetscCall(PetscFree(bufa_ptr)); 5943 } else { 5944 *startsj_s = sstartsj; 5945 *startsj_r = rstartsj; 5946 *bufa_ptr = bufa; 5947 } 5948 } else if (scall == MAT_REUSE_MATRIX) { 5949 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5950 } 5951 5952 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5953 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5954 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5955 PetscFunctionReturn(PETSC_SUCCESS); 5956 } 5957 5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5961 #if defined(PETSC_HAVE_MKL_SPARSE) 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5963 #endif 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5965 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5966 #if defined(PETSC_HAVE_ELEMENTAL) 5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5968 #endif 5969 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 5970 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5971 #endif 5972 #if defined(PETSC_HAVE_HYPRE) 5973 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5974 #endif 5975 #if defined(PETSC_HAVE_CUDA) 5976 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5977 #endif 5978 #if defined(PETSC_HAVE_HIP) 5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 5980 #endif 5981 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5983 #endif 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5985 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5986 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5987 5988 /* 5989 Computes (B'*A')' since computing B*A directly is untenable 5990 5991 n p p 5992 [ ] [ ] [ ] 5993 m [ A ] * n [ B ] = m [ C ] 5994 [ ] [ ] [ ] 5995 5996 */ 5997 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 5998 { 5999 Mat At, Bt, Ct; 6000 6001 PetscFunctionBegin; 6002 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6003 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6004 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6005 PetscCall(MatDestroy(&At)); 6006 PetscCall(MatDestroy(&Bt)); 6007 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6008 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6009 PetscCall(MatDestroy(&Ct)); 6010 PetscFunctionReturn(PETSC_SUCCESS); 6011 } 6012 6013 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6014 { 6015 PetscBool cisdense; 6016 6017 PetscFunctionBegin; 6018 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6019 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6020 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6021 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6022 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6023 PetscCall(MatSetUp(C)); 6024 6025 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6026 PetscFunctionReturn(PETSC_SUCCESS); 6027 } 6028 6029 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6030 { 6031 Mat_Product *product = C->product; 6032 Mat A = product->A, B = product->B; 6033 6034 PetscFunctionBegin; 6035 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6036 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6037 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6038 C->ops->productsymbolic = MatProductSymbolic_AB; 6039 PetscFunctionReturn(PETSC_SUCCESS); 6040 } 6041 6042 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6043 { 6044 Mat_Product *product = C->product; 6045 6046 PetscFunctionBegin; 6047 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6048 PetscFunctionReturn(PETSC_SUCCESS); 6049 } 6050 6051 /* 6052 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6053 6054 Input Parameters: 6055 6056 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6057 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6058 6059 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6060 6061 For Set1, j1[] contains column indices of the nonzeros. 6062 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6063 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6064 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6065 6066 Similar for Set2. 6067 6068 This routine merges the two sets of nonzeros row by row and removes repeats. 6069 6070 Output Parameters: (memory is allocated by the caller) 6071 6072 i[],j[]: the CSR of the merged matrix, which has m rows. 6073 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6074 imap2[]: similar to imap1[], but for Set2. 6075 Note we order nonzeros row-by-row and from left to right. 6076 */ 6077 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6078 { 6079 PetscInt r, m; /* Row index of mat */ 6080 PetscCount t, t1, t2, b1, e1, b2, e2; 6081 6082 PetscFunctionBegin; 6083 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6084 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6085 i[0] = 0; 6086 for (r = 0; r < m; r++) { /* Do row by row merging */ 6087 b1 = rowBegin1[r]; 6088 e1 = rowEnd1[r]; 6089 b2 = rowBegin2[r]; 6090 e2 = rowEnd2[r]; 6091 while (b1 < e1 && b2 < e2) { 6092 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6093 j[t] = j1[b1]; 6094 imap1[t1] = t; 6095 imap2[t2] = t; 6096 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6097 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6098 t1++; 6099 t2++; 6100 t++; 6101 } else if (j1[b1] < j2[b2]) { 6102 j[t] = j1[b1]; 6103 imap1[t1] = t; 6104 b1 += jmap1[t1 + 1] - jmap1[t1]; 6105 t1++; 6106 t++; 6107 } else { 6108 j[t] = j2[b2]; 6109 imap2[t2] = t; 6110 b2 += jmap2[t2 + 1] - jmap2[t2]; 6111 t2++; 6112 t++; 6113 } 6114 } 6115 /* Merge the remaining in either j1[] or j2[] */ 6116 while (b1 < e1) { 6117 j[t] = j1[b1]; 6118 imap1[t1] = t; 6119 b1 += jmap1[t1 + 1] - jmap1[t1]; 6120 t1++; 6121 t++; 6122 } 6123 while (b2 < e2) { 6124 j[t] = j2[b2]; 6125 imap2[t2] = t; 6126 b2 += jmap2[t2 + 1] - jmap2[t2]; 6127 t2++; 6128 t++; 6129 } 6130 PetscCall(PetscIntCast(t, i + r + 1)); 6131 } 6132 PetscFunctionReturn(PETSC_SUCCESS); 6133 } 6134 6135 /* 6136 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6137 6138 Input Parameters: 6139 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6140 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6141 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6142 6143 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6144 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6145 6146 Output Parameters: 6147 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6148 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6149 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6150 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6151 6152 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6153 Atot: number of entries belonging to the diagonal block. 6154 Annz: number of unique nonzeros belonging to the diagonal block. 6155 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6156 repeats (i.e., same 'i,j' pair). 6157 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6158 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6159 6160 Atot: number of entries belonging to the diagonal block 6161 Annz: number of unique nonzeros belonging to the diagonal block. 6162 6163 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6164 6165 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6166 */ 6167 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6168 { 6169 PetscInt cstart, cend, rstart, rend, row, col; 6170 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6171 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6172 PetscCount k, m, p, q, r, s, mid; 6173 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6174 6175 PetscFunctionBegin; 6176 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6177 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6178 m = rend - rstart; 6179 6180 /* Skip negative rows */ 6181 for (k = 0; k < n; k++) 6182 if (i[k] >= 0) break; 6183 6184 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6185 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6186 */ 6187 while (k < n) { 6188 row = i[k]; 6189 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6190 for (s = k; s < n; s++) 6191 if (i[s] != row) break; 6192 6193 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6194 for (p = k; p < s; p++) { 6195 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6196 } 6197 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6198 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6199 rowBegin[row - rstart] = k; 6200 rowMid[row - rstart] = mid; 6201 rowEnd[row - rstart] = s; 6202 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6203 6204 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6205 Atot += mid - k; 6206 Btot += s - mid; 6207 6208 /* Count unique nonzeros of this diag row */ 6209 for (p = k; p < mid;) { 6210 col = j[p]; 6211 do { 6212 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6213 p++; 6214 } while (p < mid && j[p] == col); 6215 Annz++; 6216 } 6217 6218 /* Count unique nonzeros of this offdiag row */ 6219 for (p = mid; p < s;) { 6220 col = j[p]; 6221 do { 6222 p++; 6223 } while (p < s && j[p] == col); 6224 Bnnz++; 6225 } 6226 k = s; 6227 } 6228 6229 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6230 PetscCall(PetscMalloc1(Atot, &Aperm)); 6231 PetscCall(PetscMalloc1(Btot, &Bperm)); 6232 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6233 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6234 6235 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6236 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6237 for (r = 0; r < m; r++) { 6238 k = rowBegin[r]; 6239 mid = rowMid[r]; 6240 s = rowEnd[r]; 6241 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6242 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6243 Atot += mid - k; 6244 Btot += s - mid; 6245 6246 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6247 for (p = k; p < mid;) { 6248 col = j[p]; 6249 q = p; 6250 do { 6251 p++; 6252 } while (p < mid && j[p] == col); 6253 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6254 Annz++; 6255 } 6256 6257 for (p = mid; p < s;) { 6258 col = j[p]; 6259 q = p; 6260 do { 6261 p++; 6262 } while (p < s && j[p] == col); 6263 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6264 Bnnz++; 6265 } 6266 } 6267 /* Output */ 6268 *Aperm_ = Aperm; 6269 *Annz_ = Annz; 6270 *Atot_ = Atot; 6271 *Ajmap_ = Ajmap; 6272 *Bperm_ = Bperm; 6273 *Bnnz_ = Bnnz; 6274 *Btot_ = Btot; 6275 *Bjmap_ = Bjmap; 6276 PetscFunctionReturn(PETSC_SUCCESS); 6277 } 6278 6279 /* 6280 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6281 6282 Input Parameters: 6283 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6284 nnz: number of unique nonzeros in the merged matrix 6285 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6286 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6287 6288 Output Parameter: (memory is allocated by the caller) 6289 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6290 6291 Example: 6292 nnz1 = 4 6293 nnz = 6 6294 imap = [1,3,4,5] 6295 jmap = [0,3,5,6,7] 6296 then, 6297 jmap_new = [0,0,3,3,5,6,7] 6298 */ 6299 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6300 { 6301 PetscCount k, p; 6302 6303 PetscFunctionBegin; 6304 jmap_new[0] = 0; 6305 p = nnz; /* p loops over jmap_new[] backwards */ 6306 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6307 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6308 } 6309 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6310 PetscFunctionReturn(PETSC_SUCCESS); 6311 } 6312 6313 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6314 { 6315 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6316 6317 PetscFunctionBegin; 6318 PetscCall(PetscSFDestroy(&coo->sf)); 6319 PetscCall(PetscFree(coo->Aperm1)); 6320 PetscCall(PetscFree(coo->Bperm1)); 6321 PetscCall(PetscFree(coo->Ajmap1)); 6322 PetscCall(PetscFree(coo->Bjmap1)); 6323 PetscCall(PetscFree(coo->Aimap2)); 6324 PetscCall(PetscFree(coo->Bimap2)); 6325 PetscCall(PetscFree(coo->Aperm2)); 6326 PetscCall(PetscFree(coo->Bperm2)); 6327 PetscCall(PetscFree(coo->Ajmap2)); 6328 PetscCall(PetscFree(coo->Bjmap2)); 6329 PetscCall(PetscFree(coo->Cperm1)); 6330 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6331 PetscCall(PetscFree(coo)); 6332 PetscFunctionReturn(PETSC_SUCCESS); 6333 } 6334 6335 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6336 { 6337 MPI_Comm comm; 6338 PetscMPIInt rank, size; 6339 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6340 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6341 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6342 PetscContainer container; 6343 MatCOOStruct_MPIAIJ *coo; 6344 6345 PetscFunctionBegin; 6346 PetscCall(PetscFree(mpiaij->garray)); 6347 PetscCall(VecDestroy(&mpiaij->lvec)); 6348 #if defined(PETSC_USE_CTABLE) 6349 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6350 #else 6351 PetscCall(PetscFree(mpiaij->colmap)); 6352 #endif 6353 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6354 mat->assembled = PETSC_FALSE; 6355 mat->was_assembled = PETSC_FALSE; 6356 6357 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6358 PetscCallMPI(MPI_Comm_size(comm, &size)); 6359 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6360 PetscCall(PetscLayoutSetUp(mat->rmap)); 6361 PetscCall(PetscLayoutSetUp(mat->cmap)); 6362 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6363 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6364 PetscCall(MatGetLocalSize(mat, &m, &n)); 6365 PetscCall(MatGetSize(mat, &M, &N)); 6366 6367 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6368 /* entries come first, then local rows, then remote rows. */ 6369 PetscCount n1 = coo_n, *perm1; 6370 PetscInt *i1 = coo_i, *j1 = coo_j; 6371 6372 PetscCall(PetscMalloc1(n1, &perm1)); 6373 for (k = 0; k < n1; k++) perm1[k] = k; 6374 6375 /* Manipulate indices so that entries with negative row or col indices will have smallest 6376 row indices, local entries will have greater but negative row indices, and remote entries 6377 will have positive row indices. 6378 */ 6379 for (k = 0; k < n1; k++) { 6380 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6381 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6382 else { 6383 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6384 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6385 } 6386 } 6387 6388 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6389 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6390 6391 /* Advance k to the first entry we need to take care of */ 6392 for (k = 0; k < n1; k++) 6393 if (i1[k] > PETSC_INT_MIN) break; 6394 PetscCount i1start = k; 6395 6396 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6397 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6398 6399 PetscCheck(n1 == 0 || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6400 6401 /* Send remote rows to their owner */ 6402 /* Find which rows should be sent to which remote ranks*/ 6403 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6404 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6405 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6406 const PetscInt *ranges; 6407 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6408 6409 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6410 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6411 for (k = rem; k < n1;) { 6412 PetscMPIInt owner; 6413 PetscInt firstRow, lastRow; 6414 6415 /* Locate a row range */ 6416 firstRow = i1[k]; /* first row of this owner */ 6417 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6418 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6419 6420 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6421 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6422 6423 /* All entries in [k,p) belong to this remote owner */ 6424 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6425 PetscMPIInt *sendto2; 6426 PetscInt *nentries2; 6427 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6428 6429 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6430 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6431 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6432 PetscCall(PetscFree2(sendto, nentries2)); 6433 sendto = sendto2; 6434 nentries = nentries2; 6435 maxNsend = maxNsend2; 6436 } 6437 sendto[nsend] = owner; 6438 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6439 nsend++; 6440 k = p; 6441 } 6442 6443 /* Build 1st SF to know offsets on remote to send data */ 6444 PetscSF sf1; 6445 PetscInt nroots = 1, nroots2 = 0; 6446 PetscInt nleaves = nsend, nleaves2 = 0; 6447 PetscInt *offsets; 6448 PetscSFNode *iremote; 6449 6450 PetscCall(PetscSFCreate(comm, &sf1)); 6451 PetscCall(PetscMalloc1(nsend, &iremote)); 6452 PetscCall(PetscMalloc1(nsend, &offsets)); 6453 for (k = 0; k < nsend; k++) { 6454 iremote[k].rank = sendto[k]; 6455 iremote[k].index = 0; 6456 nleaves2 += nentries[k]; 6457 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6458 } 6459 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6460 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6461 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6462 PetscCall(PetscSFDestroy(&sf1)); 6463 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6464 6465 /* Build 2nd SF to send remote COOs to their owner */ 6466 PetscSF sf2; 6467 nroots = nroots2; 6468 nleaves = nleaves2; 6469 PetscCall(PetscSFCreate(comm, &sf2)); 6470 PetscCall(PetscSFSetFromOptions(sf2)); 6471 PetscCall(PetscMalloc1(nleaves, &iremote)); 6472 p = 0; 6473 for (k = 0; k < nsend; k++) { 6474 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6475 for (q = 0; q < nentries[k]; q++, p++) { 6476 iremote[p].rank = sendto[k]; 6477 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6478 } 6479 } 6480 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6481 6482 /* Send the remote COOs to their owner */ 6483 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6484 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6485 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6486 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6487 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6488 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6489 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6490 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6491 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6492 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6493 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6494 6495 PetscCall(PetscFree(offsets)); 6496 PetscCall(PetscFree2(sendto, nentries)); 6497 6498 /* Sort received COOs by row along with the permutation array */ 6499 for (k = 0; k < n2; k++) perm2[k] = k; 6500 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6501 6502 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6503 PetscCount *Cperm1; 6504 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6505 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6506 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6507 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6508 6509 /* Support for HYPRE matrices, kind of a hack. 6510 Swap min column with diagonal so that diagonal values will go first */ 6511 PetscBool hypre; 6512 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6513 if (hypre) { 6514 PetscInt *minj; 6515 PetscBT hasdiag; 6516 6517 PetscCall(PetscBTCreate(m, &hasdiag)); 6518 PetscCall(PetscMalloc1(m, &minj)); 6519 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6520 for (k = i1start; k < rem; k++) { 6521 if (j1[k] < cstart || j1[k] >= cend) continue; 6522 const PetscInt rindex = i1[k] - rstart; 6523 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6524 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6525 } 6526 for (k = 0; k < n2; k++) { 6527 if (j2[k] < cstart || j2[k] >= cend) continue; 6528 const PetscInt rindex = i2[k] - rstart; 6529 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6530 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6531 } 6532 for (k = i1start; k < rem; k++) { 6533 const PetscInt rindex = i1[k] - rstart; 6534 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6535 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6536 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6537 } 6538 for (k = 0; k < n2; k++) { 6539 const PetscInt rindex = i2[k] - rstart; 6540 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6541 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6542 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6543 } 6544 PetscCall(PetscBTDestroy(&hasdiag)); 6545 PetscCall(PetscFree(minj)); 6546 } 6547 6548 /* Split local COOs and received COOs into diag/offdiag portions */ 6549 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6550 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6551 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6552 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6553 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6554 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6555 6556 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6557 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6558 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6559 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6560 6561 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6562 PetscInt *Ai, *Bi; 6563 PetscInt *Aj, *Bj; 6564 6565 PetscCall(PetscMalloc1(m + 1, &Ai)); 6566 PetscCall(PetscMalloc1(m + 1, &Bi)); 6567 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6568 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6569 6570 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6571 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6572 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6573 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6574 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6575 6576 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6577 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6578 6579 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6580 /* expect nonzeros in A/B most likely have local contributing entries */ 6581 PetscInt Annz = Ai[m]; 6582 PetscInt Bnnz = Bi[m]; 6583 PetscCount *Ajmap1_new, *Bjmap1_new; 6584 6585 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6586 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6587 6588 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6589 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6590 6591 PetscCall(PetscFree(Aimap1)); 6592 PetscCall(PetscFree(Ajmap1)); 6593 PetscCall(PetscFree(Bimap1)); 6594 PetscCall(PetscFree(Bjmap1)); 6595 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6596 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6597 PetscCall(PetscFree(perm1)); 6598 PetscCall(PetscFree3(i2, j2, perm2)); 6599 6600 Ajmap1 = Ajmap1_new; 6601 Bjmap1 = Bjmap1_new; 6602 6603 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6604 if (Annz < Annz1 + Annz2) { 6605 PetscInt *Aj_new; 6606 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6607 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6608 PetscCall(PetscFree(Aj)); 6609 Aj = Aj_new; 6610 } 6611 6612 if (Bnnz < Bnnz1 + Bnnz2) { 6613 PetscInt *Bj_new; 6614 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6615 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6616 PetscCall(PetscFree(Bj)); 6617 Bj = Bj_new; 6618 } 6619 6620 /* Create new submatrices for on-process and off-process coupling */ 6621 PetscScalar *Aa, *Ba; 6622 MatType rtype; 6623 Mat_SeqAIJ *a, *b; 6624 PetscObjectState state; 6625 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6626 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6627 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6628 if (cstart) { 6629 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6630 } 6631 6632 PetscCall(MatGetRootType_Private(mat, &rtype)); 6633 6634 MatSeqXAIJGetOptions_Private(mpiaij->A); 6635 PetscCall(MatDestroy(&mpiaij->A)); 6636 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6637 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6638 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6639 6640 MatSeqXAIJGetOptions_Private(mpiaij->B); 6641 PetscCall(MatDestroy(&mpiaij->B)); 6642 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6643 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6644 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6645 6646 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6647 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6648 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6649 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6650 6651 a = (Mat_SeqAIJ *)mpiaij->A->data; 6652 b = (Mat_SeqAIJ *)mpiaij->B->data; 6653 a->free_a = PETSC_TRUE; 6654 a->free_ij = PETSC_TRUE; 6655 b->free_a = PETSC_TRUE; 6656 b->free_ij = PETSC_TRUE; 6657 a->maxnz = a->nz; 6658 b->maxnz = b->nz; 6659 6660 /* conversion must happen AFTER multiply setup */ 6661 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6662 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6663 PetscCall(VecDestroy(&mpiaij->lvec)); 6664 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6665 6666 // Put the COO struct in a container and then attach that to the matrix 6667 PetscCall(PetscMalloc1(1, &coo)); 6668 coo->n = coo_n; 6669 coo->sf = sf2; 6670 coo->sendlen = nleaves; 6671 coo->recvlen = nroots; 6672 coo->Annz = Annz; 6673 coo->Bnnz = Bnnz; 6674 coo->Annz2 = Annz2; 6675 coo->Bnnz2 = Bnnz2; 6676 coo->Atot1 = Atot1; 6677 coo->Atot2 = Atot2; 6678 coo->Btot1 = Btot1; 6679 coo->Btot2 = Btot2; 6680 coo->Ajmap1 = Ajmap1; 6681 coo->Aperm1 = Aperm1; 6682 coo->Bjmap1 = Bjmap1; 6683 coo->Bperm1 = Bperm1; 6684 coo->Aimap2 = Aimap2; 6685 coo->Ajmap2 = Ajmap2; 6686 coo->Aperm2 = Aperm2; 6687 coo->Bimap2 = Bimap2; 6688 coo->Bjmap2 = Bjmap2; 6689 coo->Bperm2 = Bperm2; 6690 coo->Cperm1 = Cperm1; 6691 // Allocate in preallocation. If not used, it has zero cost on host 6692 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6693 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6694 PetscCall(PetscContainerSetPointer(container, coo)); 6695 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6696 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6697 PetscCall(PetscContainerDestroy(&container)); 6698 PetscFunctionReturn(PETSC_SUCCESS); 6699 } 6700 6701 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6702 { 6703 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6704 Mat A = mpiaij->A, B = mpiaij->B; 6705 PetscScalar *Aa, *Ba; 6706 PetscScalar *sendbuf, *recvbuf; 6707 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6708 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6709 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6710 const PetscCount *Cperm1; 6711 PetscContainer container; 6712 MatCOOStruct_MPIAIJ *coo; 6713 6714 PetscFunctionBegin; 6715 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6716 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6717 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6718 sendbuf = coo->sendbuf; 6719 recvbuf = coo->recvbuf; 6720 Ajmap1 = coo->Ajmap1; 6721 Ajmap2 = coo->Ajmap2; 6722 Aimap2 = coo->Aimap2; 6723 Bjmap1 = coo->Bjmap1; 6724 Bjmap2 = coo->Bjmap2; 6725 Bimap2 = coo->Bimap2; 6726 Aperm1 = coo->Aperm1; 6727 Aperm2 = coo->Aperm2; 6728 Bperm1 = coo->Bperm1; 6729 Bperm2 = coo->Bperm2; 6730 Cperm1 = coo->Cperm1; 6731 6732 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6733 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6734 6735 /* Pack entries to be sent to remote */ 6736 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6737 6738 /* Send remote entries to their owner and overlap the communication with local computation */ 6739 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6740 /* Add local entries to A and B */ 6741 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6742 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6743 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6744 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6745 } 6746 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6747 PetscScalar sum = 0.0; 6748 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6749 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6750 } 6751 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6752 6753 /* Add received remote entries to A and B */ 6754 for (PetscCount i = 0; i < coo->Annz2; i++) { 6755 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6756 } 6757 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6758 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6759 } 6760 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6761 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6762 PetscFunctionReturn(PETSC_SUCCESS); 6763 } 6764 6765 /*MC 6766 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6767 6768 Options Database Keys: 6769 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6770 6771 Level: beginner 6772 6773 Notes: 6774 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6775 in this case the values associated with the rows and columns one passes in are set to zero 6776 in the matrix 6777 6778 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6779 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6780 6781 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6782 M*/ 6783 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6784 { 6785 Mat_MPIAIJ *b; 6786 PetscMPIInt size; 6787 6788 PetscFunctionBegin; 6789 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6790 6791 PetscCall(PetscNew(&b)); 6792 B->data = (void *)b; 6793 B->ops[0] = MatOps_Values; 6794 B->assembled = PETSC_FALSE; 6795 B->insertmode = NOT_SET_VALUES; 6796 b->size = size; 6797 6798 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6799 6800 /* build cache for off array entries formed */ 6801 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6802 6803 b->donotstash = PETSC_FALSE; 6804 b->colmap = NULL; 6805 b->garray = NULL; 6806 b->roworiented = PETSC_TRUE; 6807 6808 /* stuff used for matrix vector multiply */ 6809 b->lvec = NULL; 6810 b->Mvctx = NULL; 6811 6812 /* stuff for MatGetRow() */ 6813 b->rowindices = NULL; 6814 b->rowvalues = NULL; 6815 b->getrowactive = PETSC_FALSE; 6816 6817 /* flexible pointer used in CUSPARSE classes */ 6818 b->spptr = NULL; 6819 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6821 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6822 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6831 #if defined(PETSC_HAVE_CUDA) 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6833 #endif 6834 #if defined(PETSC_HAVE_HIP) 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6836 #endif 6837 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6839 #endif 6840 #if defined(PETSC_HAVE_MKL_SPARSE) 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6842 #endif 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6847 #if defined(PETSC_HAVE_ELEMENTAL) 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6849 #endif 6850 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6852 #endif 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6855 #if defined(PETSC_HAVE_HYPRE) 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6858 #endif 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6863 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6864 PetscFunctionReturn(PETSC_SUCCESS); 6865 } 6866 6867 /*@ 6868 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6869 and "off-diagonal" part of the matrix in CSR format. 6870 6871 Collective 6872 6873 Input Parameters: 6874 + comm - MPI communicator 6875 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6876 . n - This value should be the same as the local size used in creating the 6877 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6878 calculated if `N` is given) For square matrices `n` is almost always `m`. 6879 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6880 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6881 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6882 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6883 . a - matrix values 6884 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6885 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6886 - oa - matrix values 6887 6888 Output Parameter: 6889 . mat - the matrix 6890 6891 Level: advanced 6892 6893 Notes: 6894 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6895 must free the arrays once the matrix has been destroyed and not before. 6896 6897 The `i` and `j` indices are 0 based 6898 6899 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6900 6901 This sets local rows and cannot be used to set off-processor values. 6902 6903 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6904 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6905 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6906 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6907 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6908 communication if it is known that only local entries will be set. 6909 6910 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6911 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6912 @*/ 6913 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6914 { 6915 Mat_MPIAIJ *maij; 6916 6917 PetscFunctionBegin; 6918 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6919 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6920 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6921 PetscCall(MatCreate(comm, mat)); 6922 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6923 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6924 maij = (Mat_MPIAIJ *)(*mat)->data; 6925 6926 (*mat)->preallocated = PETSC_TRUE; 6927 6928 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6929 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6930 6931 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6932 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6933 6934 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6935 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6936 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6937 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6938 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6939 PetscFunctionReturn(PETSC_SUCCESS); 6940 } 6941 6942 typedef struct { 6943 Mat *mp; /* intermediate products */ 6944 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6945 PetscInt cp; /* number of intermediate products */ 6946 6947 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6948 PetscInt *startsj_s, *startsj_r; 6949 PetscScalar *bufa; 6950 Mat P_oth; 6951 6952 /* may take advantage of merging product->B */ 6953 Mat Bloc; /* B-local by merging diag and off-diag */ 6954 6955 /* cusparse does not have support to split between symbolic and numeric phases. 6956 When api_user is true, we don't need to update the numerical values 6957 of the temporary storage */ 6958 PetscBool reusesym; 6959 6960 /* support for COO values insertion */ 6961 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6962 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6963 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6964 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6965 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6966 PetscMemType mtype; 6967 6968 /* customization */ 6969 PetscBool abmerge; 6970 PetscBool P_oth_bind; 6971 } MatMatMPIAIJBACKEND; 6972 6973 static PetscErrorCode MatProductCtxDestroy_MatMatMPIAIJBACKEND(void **data) 6974 { 6975 MatMatMPIAIJBACKEND *mmdata = *(MatMatMPIAIJBACKEND **)data; 6976 PetscInt i; 6977 6978 PetscFunctionBegin; 6979 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6980 PetscCall(PetscFree(mmdata->bufa)); 6981 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6982 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6983 PetscCall(MatDestroy(&mmdata->P_oth)); 6984 PetscCall(MatDestroy(&mmdata->Bloc)); 6985 PetscCall(PetscSFDestroy(&mmdata->sf)); 6986 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6987 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6988 PetscCall(PetscFree(mmdata->own[0])); 6989 PetscCall(PetscFree(mmdata->own)); 6990 PetscCall(PetscFree(mmdata->off[0])); 6991 PetscCall(PetscFree(mmdata->off)); 6992 PetscCall(PetscFree(mmdata)); 6993 PetscFunctionReturn(PETSC_SUCCESS); 6994 } 6995 6996 /* Copy selected n entries with indices in idx[] of A to v[]. 6997 If idx is NULL, copy the whole data array of A to v[] 6998 */ 6999 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7000 { 7001 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7002 7003 PetscFunctionBegin; 7004 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7005 if (f) { 7006 PetscCall((*f)(A, n, idx, v)); 7007 } else { 7008 const PetscScalar *vv; 7009 7010 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7011 if (n && idx) { 7012 PetscScalar *w = v; 7013 const PetscInt *oi = idx; 7014 PetscInt j; 7015 7016 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7017 } else { 7018 PetscCall(PetscArraycpy(v, vv, n)); 7019 } 7020 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7021 } 7022 PetscFunctionReturn(PETSC_SUCCESS); 7023 } 7024 7025 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7026 { 7027 MatMatMPIAIJBACKEND *mmdata; 7028 PetscInt i, n_d, n_o; 7029 7030 PetscFunctionBegin; 7031 MatCheckProduct(C, 1); 7032 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7033 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7034 if (!mmdata->reusesym) { /* update temporary matrices */ 7035 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7036 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7037 } 7038 mmdata->reusesym = PETSC_FALSE; 7039 7040 for (i = 0; i < mmdata->cp; i++) { 7041 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7042 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7043 } 7044 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7045 PetscInt noff; 7046 7047 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7048 if (mmdata->mptmp[i]) continue; 7049 if (noff) { 7050 PetscInt nown; 7051 7052 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7053 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7054 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7055 n_o += noff; 7056 n_d += nown; 7057 } else { 7058 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7059 7060 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7061 n_d += mm->nz; 7062 } 7063 } 7064 if (mmdata->hasoffproc) { /* offprocess insertion */ 7065 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7066 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7067 } 7068 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7069 PetscFunctionReturn(PETSC_SUCCESS); 7070 } 7071 7072 /* Support for Pt * A, A * P, or Pt * A * P */ 7073 #define MAX_NUMBER_INTERMEDIATE 4 7074 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7075 { 7076 Mat_Product *product = C->product; 7077 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7078 Mat_MPIAIJ *a, *p; 7079 MatMatMPIAIJBACKEND *mmdata; 7080 ISLocalToGlobalMapping P_oth_l2g = NULL; 7081 IS glob = NULL; 7082 const char *prefix; 7083 char pprefix[256]; 7084 const PetscInt *globidx, *P_oth_idx; 7085 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7086 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7087 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7088 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7089 /* a base offset; type-2: sparse with a local to global map table */ 7090 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7091 7092 MatProductType ptype; 7093 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7094 PetscMPIInt size; 7095 7096 PetscFunctionBegin; 7097 MatCheckProduct(C, 1); 7098 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7099 ptype = product->type; 7100 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7101 ptype = MATPRODUCT_AB; 7102 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7103 } 7104 switch (ptype) { 7105 case MATPRODUCT_AB: 7106 A = product->A; 7107 P = product->B; 7108 m = A->rmap->n; 7109 n = P->cmap->n; 7110 M = A->rmap->N; 7111 N = P->cmap->N; 7112 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7113 break; 7114 case MATPRODUCT_AtB: 7115 P = product->A; 7116 A = product->B; 7117 m = P->cmap->n; 7118 n = A->cmap->n; 7119 M = P->cmap->N; 7120 N = A->cmap->N; 7121 hasoffproc = PETSC_TRUE; 7122 break; 7123 case MATPRODUCT_PtAP: 7124 A = product->A; 7125 P = product->B; 7126 m = P->cmap->n; 7127 n = P->cmap->n; 7128 M = P->cmap->N; 7129 N = P->cmap->N; 7130 hasoffproc = PETSC_TRUE; 7131 break; 7132 default: 7133 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7134 } 7135 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7136 if (size == 1) hasoffproc = PETSC_FALSE; 7137 7138 /* defaults */ 7139 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7140 mp[i] = NULL; 7141 mptmp[i] = PETSC_FALSE; 7142 rmapt[i] = -1; 7143 cmapt[i] = -1; 7144 rmapa[i] = NULL; 7145 cmapa[i] = NULL; 7146 } 7147 7148 /* customization */ 7149 PetscCall(PetscNew(&mmdata)); 7150 mmdata->reusesym = product->api_user; 7151 if (ptype == MATPRODUCT_AB) { 7152 if (product->api_user) { 7153 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7154 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7155 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7156 PetscOptionsEnd(); 7157 } else { 7158 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7159 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7160 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7161 PetscOptionsEnd(); 7162 } 7163 } else if (ptype == MATPRODUCT_PtAP) { 7164 if (product->api_user) { 7165 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7166 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7167 PetscOptionsEnd(); 7168 } else { 7169 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7170 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7171 PetscOptionsEnd(); 7172 } 7173 } 7174 a = (Mat_MPIAIJ *)A->data; 7175 p = (Mat_MPIAIJ *)P->data; 7176 PetscCall(MatSetSizes(C, m, n, M, N)); 7177 PetscCall(PetscLayoutSetUp(C->rmap)); 7178 PetscCall(PetscLayoutSetUp(C->cmap)); 7179 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7180 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7181 7182 cp = 0; 7183 switch (ptype) { 7184 case MATPRODUCT_AB: /* A * P */ 7185 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7186 7187 /* A_diag * P_local (merged or not) */ 7188 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7189 /* P is product->B */ 7190 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7191 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7192 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7193 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7194 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7195 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7196 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7197 mp[cp]->product->api_user = product->api_user; 7198 PetscCall(MatProductSetFromOptions(mp[cp])); 7199 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7200 PetscCall(ISGetIndices(glob, &globidx)); 7201 rmapt[cp] = 1; 7202 cmapt[cp] = 2; 7203 cmapa[cp] = globidx; 7204 mptmp[cp] = PETSC_FALSE; 7205 cp++; 7206 } else { /* A_diag * P_diag and A_diag * P_off */ 7207 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7208 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7209 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7210 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7211 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7212 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7213 mp[cp]->product->api_user = product->api_user; 7214 PetscCall(MatProductSetFromOptions(mp[cp])); 7215 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7216 rmapt[cp] = 1; 7217 cmapt[cp] = 1; 7218 mptmp[cp] = PETSC_FALSE; 7219 cp++; 7220 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7221 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7222 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7223 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7224 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7225 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7226 mp[cp]->product->api_user = product->api_user; 7227 PetscCall(MatProductSetFromOptions(mp[cp])); 7228 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7229 rmapt[cp] = 1; 7230 cmapt[cp] = 2; 7231 cmapa[cp] = p->garray; 7232 mptmp[cp] = PETSC_FALSE; 7233 cp++; 7234 } 7235 7236 /* A_off * P_other */ 7237 if (mmdata->P_oth) { 7238 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7239 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7240 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7241 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7242 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7243 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7244 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7245 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7246 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7247 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7248 mp[cp]->product->api_user = product->api_user; 7249 PetscCall(MatProductSetFromOptions(mp[cp])); 7250 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7251 rmapt[cp] = 1; 7252 cmapt[cp] = 2; 7253 cmapa[cp] = P_oth_idx; 7254 mptmp[cp] = PETSC_FALSE; 7255 cp++; 7256 } 7257 break; 7258 7259 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7260 /* A is product->B */ 7261 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7262 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7263 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7264 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7265 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7266 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7267 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7268 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7269 mp[cp]->product->api_user = product->api_user; 7270 PetscCall(MatProductSetFromOptions(mp[cp])); 7271 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7272 PetscCall(ISGetIndices(glob, &globidx)); 7273 rmapt[cp] = 2; 7274 rmapa[cp] = globidx; 7275 cmapt[cp] = 2; 7276 cmapa[cp] = globidx; 7277 mptmp[cp] = PETSC_FALSE; 7278 cp++; 7279 } else { 7280 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7281 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7282 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7283 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7284 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7285 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7286 mp[cp]->product->api_user = product->api_user; 7287 PetscCall(MatProductSetFromOptions(mp[cp])); 7288 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7289 PetscCall(ISGetIndices(glob, &globidx)); 7290 rmapt[cp] = 1; 7291 cmapt[cp] = 2; 7292 cmapa[cp] = globidx; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 rmapt[cp] = 2; 7305 rmapa[cp] = p->garray; 7306 cmapt[cp] = 2; 7307 cmapa[cp] = globidx; 7308 mptmp[cp] = PETSC_FALSE; 7309 cp++; 7310 } 7311 break; 7312 case MATPRODUCT_PtAP: 7313 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7314 /* P is product->B */ 7315 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7316 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7317 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7318 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7319 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7320 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7321 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7322 mp[cp]->product->api_user = product->api_user; 7323 PetscCall(MatProductSetFromOptions(mp[cp])); 7324 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7325 PetscCall(ISGetIndices(glob, &globidx)); 7326 rmapt[cp] = 2; 7327 rmapa[cp] = globidx; 7328 cmapt[cp] = 2; 7329 cmapa[cp] = globidx; 7330 mptmp[cp] = PETSC_FALSE; 7331 cp++; 7332 if (mmdata->P_oth) { 7333 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7334 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7335 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7336 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7337 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7338 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7339 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7340 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7341 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7342 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7343 mp[cp]->product->api_user = product->api_user; 7344 PetscCall(MatProductSetFromOptions(mp[cp])); 7345 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7346 mptmp[cp] = PETSC_TRUE; 7347 cp++; 7348 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 rmapt[cp] = 2; 7358 rmapa[cp] = globidx; 7359 cmapt[cp] = 2; 7360 cmapa[cp] = P_oth_idx; 7361 mptmp[cp] = PETSC_FALSE; 7362 cp++; 7363 } 7364 break; 7365 default: 7366 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7367 } 7368 /* sanity check */ 7369 if (size > 1) 7370 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7371 7372 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7373 for (i = 0; i < cp; i++) { 7374 mmdata->mp[i] = mp[i]; 7375 mmdata->mptmp[i] = mptmp[i]; 7376 } 7377 mmdata->cp = cp; 7378 C->product->data = mmdata; 7379 C->product->destroy = MatProductCtxDestroy_MatMatMPIAIJBACKEND; 7380 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7381 7382 /* memory type */ 7383 mmdata->mtype = PETSC_MEMTYPE_HOST; 7384 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7385 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7386 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7387 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7388 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7389 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7390 7391 /* prepare coo coordinates for values insertion */ 7392 7393 /* count total nonzeros of those intermediate seqaij Mats 7394 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7395 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7396 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7397 */ 7398 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7399 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7400 if (mptmp[cp]) continue; 7401 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7402 const PetscInt *rmap = rmapa[cp]; 7403 const PetscInt mr = mp[cp]->rmap->n; 7404 const PetscInt rs = C->rmap->rstart; 7405 const PetscInt re = C->rmap->rend; 7406 const PetscInt *ii = mm->i; 7407 for (i = 0; i < mr; i++) { 7408 const PetscInt gr = rmap[i]; 7409 const PetscInt nz = ii[i + 1] - ii[i]; 7410 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7411 else ncoo_oown += nz; /* this row is local */ 7412 } 7413 } else ncoo_d += mm->nz; 7414 } 7415 7416 /* 7417 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7418 7419 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7420 7421 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7422 7423 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7424 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7425 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7426 7427 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7428 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7429 */ 7430 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7431 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7432 7433 /* gather (i,j) of nonzeros inserted by remote procs */ 7434 if (hasoffproc) { 7435 PetscSF msf; 7436 PetscInt ncoo2, *coo_i2, *coo_j2; 7437 7438 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7439 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7440 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7441 7442 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7443 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7444 PetscInt *idxoff = mmdata->off[cp]; 7445 PetscInt *idxown = mmdata->own[cp]; 7446 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7447 const PetscInt *rmap = rmapa[cp]; 7448 const PetscInt *cmap = cmapa[cp]; 7449 const PetscInt *ii = mm->i; 7450 PetscInt *coi = coo_i + ncoo_o; 7451 PetscInt *coj = coo_j + ncoo_o; 7452 const PetscInt mr = mp[cp]->rmap->n; 7453 const PetscInt rs = C->rmap->rstart; 7454 const PetscInt re = C->rmap->rend; 7455 const PetscInt cs = C->cmap->rstart; 7456 for (i = 0; i < mr; i++) { 7457 const PetscInt *jj = mm->j + ii[i]; 7458 const PetscInt gr = rmap[i]; 7459 const PetscInt nz = ii[i + 1] - ii[i]; 7460 if (gr < rs || gr >= re) { /* this is an offproc row */ 7461 for (j = ii[i]; j < ii[i + 1]; j++) { 7462 *coi++ = gr; 7463 *idxoff++ = j; 7464 } 7465 if (!cmapt[cp]) { /* already global */ 7466 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7467 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7468 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7469 } else { /* offdiag */ 7470 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7471 } 7472 ncoo_o += nz; 7473 } else { /* this is a local row */ 7474 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7475 } 7476 } 7477 } 7478 mmdata->off[cp + 1] = idxoff; 7479 mmdata->own[cp + 1] = idxown; 7480 } 7481 7482 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7483 PetscInt incoo_o; 7484 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7485 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7486 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7487 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7488 ncoo = ncoo_d + ncoo_oown + ncoo2; 7489 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7490 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7491 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7492 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7493 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7494 PetscCall(PetscFree2(coo_i, coo_j)); 7495 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7496 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7497 coo_i = coo_i2; 7498 coo_j = coo_j2; 7499 } else { /* no offproc values insertion */ 7500 ncoo = ncoo_d; 7501 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7502 7503 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7504 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7505 PetscCall(PetscSFSetUp(mmdata->sf)); 7506 } 7507 mmdata->hasoffproc = hasoffproc; 7508 7509 /* gather (i,j) of nonzeros inserted locally */ 7510 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7511 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7512 PetscInt *coi = coo_i + ncoo_d; 7513 PetscInt *coj = coo_j + ncoo_d; 7514 const PetscInt *jj = mm->j; 7515 const PetscInt *ii = mm->i; 7516 const PetscInt *cmap = cmapa[cp]; 7517 const PetscInt *rmap = rmapa[cp]; 7518 const PetscInt mr = mp[cp]->rmap->n; 7519 const PetscInt rs = C->rmap->rstart; 7520 const PetscInt re = C->rmap->rend; 7521 const PetscInt cs = C->cmap->rstart; 7522 7523 if (mptmp[cp]) continue; 7524 if (rmapt[cp] == 1) { /* consecutive rows */ 7525 /* fill coo_i */ 7526 for (i = 0; i < mr; i++) { 7527 const PetscInt gr = i + rs; 7528 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7529 } 7530 /* fill coo_j */ 7531 if (!cmapt[cp]) { /* type-0, already global */ 7532 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7533 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7534 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7535 } else { /* type-2, local to global for sparse columns */ 7536 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7537 } 7538 ncoo_d += mm->nz; 7539 } else if (rmapt[cp] == 2) { /* sparse rows */ 7540 for (i = 0; i < mr; i++) { 7541 const PetscInt *jj = mm->j + ii[i]; 7542 const PetscInt gr = rmap[i]; 7543 const PetscInt nz = ii[i + 1] - ii[i]; 7544 if (gr >= rs && gr < re) { /* local rows */ 7545 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7546 if (!cmapt[cp]) { /* type-0, already global */ 7547 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7548 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7549 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7550 } else { /* type-2, local to global for sparse columns */ 7551 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7552 } 7553 ncoo_d += nz; 7554 } 7555 } 7556 } 7557 } 7558 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7559 PetscCall(ISDestroy(&glob)); 7560 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7561 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7562 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7563 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7564 7565 /* set block sizes */ 7566 A = product->A; 7567 P = product->B; 7568 switch (ptype) { 7569 case MATPRODUCT_PtAP: 7570 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7571 break; 7572 case MATPRODUCT_RARt: 7573 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7574 break; 7575 case MATPRODUCT_ABC: 7576 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7577 break; 7578 case MATPRODUCT_AB: 7579 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7580 break; 7581 case MATPRODUCT_AtB: 7582 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7583 break; 7584 case MATPRODUCT_ABt: 7585 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7586 break; 7587 default: 7588 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7589 } 7590 7591 /* preallocate with COO data */ 7592 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7593 PetscCall(PetscFree2(coo_i, coo_j)); 7594 PetscFunctionReturn(PETSC_SUCCESS); 7595 } 7596 7597 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7598 { 7599 Mat_Product *product = mat->product; 7600 #if defined(PETSC_HAVE_DEVICE) 7601 PetscBool match = PETSC_FALSE; 7602 PetscBool usecpu = PETSC_FALSE; 7603 #else 7604 PetscBool match = PETSC_TRUE; 7605 #endif 7606 7607 PetscFunctionBegin; 7608 MatCheckProduct(mat, 1); 7609 #if defined(PETSC_HAVE_DEVICE) 7610 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7611 if (match) { /* we can always fallback to the CPU if requested */ 7612 switch (product->type) { 7613 case MATPRODUCT_AB: 7614 if (product->api_user) { 7615 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7616 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7617 PetscOptionsEnd(); 7618 } else { 7619 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7620 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7621 PetscOptionsEnd(); 7622 } 7623 break; 7624 case MATPRODUCT_AtB: 7625 if (product->api_user) { 7626 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7627 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7628 PetscOptionsEnd(); 7629 } else { 7630 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7631 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7632 PetscOptionsEnd(); 7633 } 7634 break; 7635 case MATPRODUCT_PtAP: 7636 if (product->api_user) { 7637 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7638 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7639 PetscOptionsEnd(); 7640 } else { 7641 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7642 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7643 PetscOptionsEnd(); 7644 } 7645 break; 7646 default: 7647 break; 7648 } 7649 match = (PetscBool)!usecpu; 7650 } 7651 #endif 7652 if (match) { 7653 switch (product->type) { 7654 case MATPRODUCT_AB: 7655 case MATPRODUCT_AtB: 7656 case MATPRODUCT_PtAP: 7657 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7658 break; 7659 default: 7660 break; 7661 } 7662 } 7663 /* fallback to MPIAIJ ops */ 7664 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7665 PetscFunctionReturn(PETSC_SUCCESS); 7666 } 7667 7668 /* 7669 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7670 7671 n - the number of block indices in cc[] 7672 cc - the block indices (must be large enough to contain the indices) 7673 */ 7674 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7675 { 7676 PetscInt cnt = -1, nidx, j; 7677 const PetscInt *idx; 7678 7679 PetscFunctionBegin; 7680 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7681 if (nidx) { 7682 cnt = 0; 7683 cc[cnt] = idx[0] / bs; 7684 for (j = 1; j < nidx; j++) { 7685 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7686 } 7687 } 7688 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7689 *n = cnt + 1; 7690 PetscFunctionReturn(PETSC_SUCCESS); 7691 } 7692 7693 /* 7694 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7695 7696 ncollapsed - the number of block indices 7697 collapsed - the block indices (must be large enough to contain the indices) 7698 */ 7699 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7700 { 7701 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7702 7703 PetscFunctionBegin; 7704 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7705 for (i = start + 1; i < start + bs; i++) { 7706 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7707 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7708 cprevtmp = cprev; 7709 cprev = merged; 7710 merged = cprevtmp; 7711 } 7712 *ncollapsed = nprev; 7713 if (collapsed) *collapsed = cprev; 7714 PetscFunctionReturn(PETSC_SUCCESS); 7715 } 7716 7717 /* 7718 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7719 7720 Input Parameter: 7721 . Amat - matrix 7722 - symmetrize - make the result symmetric 7723 + scale - scale with diagonal 7724 7725 Output Parameter: 7726 . a_Gmat - output scalar graph >= 0 7727 7728 */ 7729 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7730 { 7731 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7732 MPI_Comm comm; 7733 Mat Gmat; 7734 PetscBool ismpiaij, isseqaij; 7735 Mat a, b, c; 7736 MatType jtype; 7737 7738 PetscFunctionBegin; 7739 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7740 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7741 PetscCall(MatGetSize(Amat, &MM, &NN)); 7742 PetscCall(MatGetBlockSize(Amat, &bs)); 7743 nloc = (Iend - Istart) / bs; 7744 7745 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7746 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7747 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7748 7749 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7750 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7751 implementation */ 7752 if (bs > 1) { 7753 PetscCall(MatGetType(Amat, &jtype)); 7754 PetscCall(MatCreate(comm, &Gmat)); 7755 PetscCall(MatSetType(Gmat, jtype)); 7756 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7757 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7758 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7759 PetscInt *d_nnz, *o_nnz; 7760 MatScalar *aa, val, *AA; 7761 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7762 7763 if (isseqaij) { 7764 a = Amat; 7765 b = NULL; 7766 } else { 7767 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7768 a = d->A; 7769 b = d->B; 7770 } 7771 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7772 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7773 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7774 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7775 const PetscInt *cols1, *cols2; 7776 7777 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7778 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7779 nnz[brow / bs] = nc2 / bs; 7780 if (nc2 % bs) ok = 0; 7781 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7782 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7783 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7784 if (nc1 != nc2) ok = 0; 7785 else { 7786 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7787 if (cols1[jj] != cols2[jj]) ok = 0; 7788 if (cols1[jj] % bs != jj % bs) ok = 0; 7789 } 7790 } 7791 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7792 } 7793 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7794 if (!ok) { 7795 PetscCall(PetscFree2(d_nnz, o_nnz)); 7796 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7797 goto old_bs; 7798 } 7799 } 7800 } 7801 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7802 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7803 PetscCall(PetscFree2(d_nnz, o_nnz)); 7804 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7805 // diag 7806 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7807 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7808 7809 ai = aseq->i; 7810 n = ai[brow + 1] - ai[brow]; 7811 aj = aseq->j + ai[brow]; 7812 for (PetscInt k = 0; k < n; k += bs) { // block columns 7813 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7814 val = 0; 7815 if (index_size == 0) { 7816 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7817 aa = aseq->a + ai[brow + ii] + k; 7818 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7819 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7820 } 7821 } 7822 } else { // use (index,index) value if provided 7823 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7824 PetscInt ii = index[iii]; 7825 aa = aseq->a + ai[brow + ii] + k; 7826 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7827 PetscInt jj = index[jjj]; 7828 val += PetscAbs(PetscRealPart(aa[jj])); 7829 } 7830 } 7831 } 7832 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7833 AA[k / bs] = val; 7834 } 7835 grow = Istart / bs + brow / bs; 7836 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7837 } 7838 // off-diag 7839 if (ismpiaij) { 7840 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7841 const PetscScalar *vals; 7842 const PetscInt *cols, *garray = aij->garray; 7843 7844 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7845 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7846 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7847 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7848 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7849 AA[k / bs] = 0; 7850 AJ[cidx] = garray[cols[k]] / bs; 7851 } 7852 nc = ncols / bs; 7853 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7854 if (index_size == 0) { 7855 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7856 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7857 for (PetscInt k = 0; k < ncols; k += bs) { 7858 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7859 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7860 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7861 } 7862 } 7863 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7864 } 7865 } else { // use (index,index) value if provided 7866 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7867 PetscInt ii = index[iii]; 7868 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7869 for (PetscInt k = 0; k < ncols; k += bs) { 7870 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7871 PetscInt jj = index[jjj]; 7872 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7873 } 7874 } 7875 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7876 } 7877 } 7878 grow = Istart / bs + brow / bs; 7879 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7880 } 7881 } 7882 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7883 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7884 PetscCall(PetscFree2(AA, AJ)); 7885 } else { 7886 const PetscScalar *vals; 7887 const PetscInt *idx; 7888 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7889 old_bs: 7890 /* 7891 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7892 */ 7893 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7894 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7895 if (isseqaij) { 7896 PetscInt max_d_nnz; 7897 7898 /* 7899 Determine exact preallocation count for (sequential) scalar matrix 7900 */ 7901 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7902 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7903 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7904 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7905 PetscCall(PetscFree3(w0, w1, w2)); 7906 } else if (ismpiaij) { 7907 Mat Daij, Oaij; 7908 const PetscInt *garray; 7909 PetscInt max_d_nnz; 7910 7911 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7912 /* 7913 Determine exact preallocation count for diagonal block portion of scalar matrix 7914 */ 7915 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7916 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7917 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7918 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7919 PetscCall(PetscFree3(w0, w1, w2)); 7920 /* 7921 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7922 */ 7923 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7924 o_nnz[jj] = 0; 7925 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7926 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7927 o_nnz[jj] += ncols; 7928 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7929 } 7930 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7931 } 7932 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7933 /* get scalar copy (norms) of matrix */ 7934 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7935 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7936 PetscCall(PetscFree2(d_nnz, o_nnz)); 7937 for (Ii = Istart; Ii < Iend; Ii++) { 7938 PetscInt dest_row = Ii / bs; 7939 7940 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7941 for (jj = 0; jj < ncols; jj++) { 7942 PetscInt dest_col = idx[jj] / bs; 7943 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7944 7945 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7946 } 7947 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7948 } 7949 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7950 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7951 } 7952 } else { 7953 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7954 else { 7955 Gmat = Amat; 7956 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7957 } 7958 if (isseqaij) { 7959 a = Gmat; 7960 b = NULL; 7961 } else { 7962 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7963 a = d->A; 7964 b = d->B; 7965 } 7966 if (filter >= 0 || scale) { 7967 /* take absolute value of each entry */ 7968 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7969 MatInfo info; 7970 PetscScalar *avals; 7971 7972 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7973 PetscCall(MatSeqAIJGetArray(c, &avals)); 7974 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7975 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7976 } 7977 } 7978 } 7979 if (symmetrize) { 7980 PetscBool isset, issym; 7981 7982 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7983 if (!isset || !issym) { 7984 Mat matTrans; 7985 7986 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7987 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7988 PetscCall(MatDestroy(&matTrans)); 7989 } 7990 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7991 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7992 if (scale) { 7993 /* scale c for all diagonal values = 1 or -1 */ 7994 Vec diag; 7995 7996 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7997 PetscCall(MatGetDiagonal(Gmat, diag)); 7998 PetscCall(VecReciprocal(diag)); 7999 PetscCall(VecSqrtAbs(diag)); 8000 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8001 PetscCall(VecDestroy(&diag)); 8002 } 8003 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8004 if (filter >= 0) { 8005 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8006 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8007 } 8008 *a_Gmat = Gmat; 8009 PetscFunctionReturn(PETSC_SUCCESS); 8010 } 8011 8012 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8013 { 8014 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8015 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8016 8017 PetscFunctionBegin; 8018 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8019 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8020 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8021 PetscFunctionReturn(PETSC_SUCCESS); 8022 } 8023 8024 /* 8025 Special version for direct calls from Fortran 8026 */ 8027 8028 /* Change these macros so can be used in void function */ 8029 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8030 #undef PetscCall 8031 #define PetscCall(...) \ 8032 do { \ 8033 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8034 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8035 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8036 return; \ 8037 } \ 8038 } while (0) 8039 8040 #undef SETERRQ 8041 #define SETERRQ(comm, ierr, ...) \ 8042 do { \ 8043 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8044 return; \ 8045 } while (0) 8046 8047 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8048 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8049 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8050 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8051 #else 8052 #endif 8053 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8054 { 8055 Mat mat = *mmat; 8056 PetscInt m = *mm, n = *mn; 8057 InsertMode addv = *maddv; 8058 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8059 PetscScalar value; 8060 8061 MatCheckPreallocated(mat, 1); 8062 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8063 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8064 { 8065 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8066 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8067 PetscBool roworiented = aij->roworiented; 8068 8069 /* Some Variables required in the macro */ 8070 Mat A = aij->A; 8071 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8072 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8073 MatScalar *aa; 8074 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8075 Mat B = aij->B; 8076 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8077 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8078 MatScalar *ba; 8079 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8080 * cannot use "#if defined" inside a macro. */ 8081 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8082 8083 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8084 PetscInt nonew = a->nonew; 8085 MatScalar *ap1, *ap2; 8086 8087 PetscFunctionBegin; 8088 PetscCall(MatSeqAIJGetArray(A, &aa)); 8089 PetscCall(MatSeqAIJGetArray(B, &ba)); 8090 for (i = 0; i < m; i++) { 8091 if (im[i] < 0) continue; 8092 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8093 if (im[i] >= rstart && im[i] < rend) { 8094 row = im[i] - rstart; 8095 lastcol1 = -1; 8096 rp1 = aj + ai[row]; 8097 ap1 = aa + ai[row]; 8098 rmax1 = aimax[row]; 8099 nrow1 = ailen[row]; 8100 low1 = 0; 8101 high1 = nrow1; 8102 lastcol2 = -1; 8103 rp2 = bj + bi[row]; 8104 ap2 = ba + bi[row]; 8105 rmax2 = bimax[row]; 8106 nrow2 = bilen[row]; 8107 low2 = 0; 8108 high2 = nrow2; 8109 8110 for (j = 0; j < n; j++) { 8111 if (roworiented) value = v[i * n + j]; 8112 else value = v[i + j * m]; 8113 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8114 if (in[j] >= cstart && in[j] < cend) { 8115 col = in[j] - cstart; 8116 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8117 } else if (in[j] < 0) continue; 8118 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8119 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8120 } else { 8121 if (mat->was_assembled) { 8122 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8123 #if defined(PETSC_USE_CTABLE) 8124 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8125 col--; 8126 #else 8127 col = aij->colmap[in[j]] - 1; 8128 #endif 8129 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8130 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8131 col = in[j]; 8132 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8133 B = aij->B; 8134 b = (Mat_SeqAIJ *)B->data; 8135 bimax = b->imax; 8136 bi = b->i; 8137 bilen = b->ilen; 8138 bj = b->j; 8139 rp2 = bj + bi[row]; 8140 ap2 = ba + bi[row]; 8141 rmax2 = bimax[row]; 8142 nrow2 = bilen[row]; 8143 low2 = 0; 8144 high2 = nrow2; 8145 bm = aij->B->rmap->n; 8146 ba = b->a; 8147 inserted = PETSC_FALSE; 8148 } 8149 } else col = in[j]; 8150 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8151 } 8152 } 8153 } else if (!aij->donotstash) { 8154 if (roworiented) { 8155 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8156 } else { 8157 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8158 } 8159 } 8160 } 8161 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8162 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8163 } 8164 PetscFunctionReturnVoid(); 8165 } 8166 8167 /* Undefining these here since they were redefined from their original definition above! No 8168 * other PETSc functions should be defined past this point, as it is impossible to recover the 8169 * original definitions */ 8170 #undef PetscCall 8171 #undef SETERRQ 8172