1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 PetscMPIInt in; 310 311 PetscFunctionBegin; 312 PetscCall(MatGetSize(A, &m, &n)); 313 PetscCall(PetscCalloc1(n, &work)); 314 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 316 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 317 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 318 if (type == NORM_2) { 319 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 320 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 321 } else if (type == NORM_1) { 322 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 323 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 324 } else if (type == NORM_INFINITY) { 325 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 326 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 327 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 328 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 329 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 330 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 331 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 332 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 333 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 334 PetscCall(PetscMPIIntCast(n, &in)); 335 if (type == NORM_INFINITY) { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 337 } else { 338 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 339 } 340 PetscCall(PetscFree(work)); 341 if (type == NORM_2) { 342 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 343 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 344 for (i = 0; i < n; i++) reductions[i] /= m; 345 } 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 350 { 351 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 352 IS sis, gis; 353 const PetscInt *isis, *igis; 354 PetscInt n, *iis, nsis, ngis, rstart, i; 355 356 PetscFunctionBegin; 357 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 358 PetscCall(MatFindNonzeroRows(a->B, &gis)); 359 PetscCall(ISGetSize(gis, &ngis)); 360 PetscCall(ISGetSize(sis, &nsis)); 361 PetscCall(ISGetIndices(sis, &isis)); 362 PetscCall(ISGetIndices(gis, &igis)); 363 364 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 365 PetscCall(PetscArraycpy(iis, igis, ngis)); 366 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 367 n = ngis + nsis; 368 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 369 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 370 for (i = 0; i < n; i++) iis[i] += rstart; 371 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 372 373 PetscCall(ISRestoreIndices(sis, &isis)); 374 PetscCall(ISRestoreIndices(gis, &igis)); 375 PetscCall(ISDestroy(&sis)); 376 PetscCall(ISDestroy(&gis)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 /* 381 Local utility routine that creates a mapping from the global column 382 number to the local number in the off-diagonal part of the local 383 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 384 a slightly higher hash table cost; without it it is not scalable (each processor 385 has an order N integer array but is fast to access. 386 */ 387 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 388 { 389 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 390 PetscInt n = aij->B->cmap->n, i; 391 392 PetscFunctionBegin; 393 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 394 #if defined(PETSC_USE_CTABLE) 395 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 396 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 397 #else 398 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 399 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 400 #endif 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 405 do { \ 406 if (col <= lastcol1) low1 = 0; \ 407 else high1 = nrow1; \ 408 lastcol1 = col; \ 409 while (high1 - low1 > 5) { \ 410 t = (low1 + high1) / 2; \ 411 if (rp1[t] > col) high1 = t; \ 412 else low1 = t; \ 413 } \ 414 for (_i = low1; _i < high1; _i++) { \ 415 if (rp1[_i] > col) break; \ 416 if (rp1[_i] == col) { \ 417 if (addv == ADD_VALUES) { \ 418 ap1[_i] += value; \ 419 /* Not sure LogFlops will slow dow the code or not */ \ 420 (void)PetscLogFlops(1.0); \ 421 } else ap1[_i] = value; \ 422 goto a_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries && row != col) { \ 426 low1 = 0; \ 427 high1 = nrow1; \ 428 goto a_noinsert; \ 429 } \ 430 if (nonew == 1) { \ 431 low1 = 0; \ 432 high1 = nrow1; \ 433 goto a_noinsert; \ 434 } \ 435 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 436 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 437 N = nrow1++ - 1; \ 438 a->nz++; \ 439 high1++; \ 440 /* shift up all the later entries in this row */ \ 441 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 442 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 443 rp1[_i] = col; \ 444 ap1[_i] = value; \ 445 a_noinsert:; \ 446 ailen[row] = nrow1; \ 447 } while (0) 448 449 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 450 do { \ 451 if (col <= lastcol2) low2 = 0; \ 452 else high2 = nrow2; \ 453 lastcol2 = col; \ 454 while (high2 - low2 > 5) { \ 455 t = (low2 + high2) / 2; \ 456 if (rp2[t] > col) high2 = t; \ 457 else low2 = t; \ 458 } \ 459 for (_i = low2; _i < high2; _i++) { \ 460 if (rp2[_i] > col) break; \ 461 if (rp2[_i] == col) { \ 462 if (addv == ADD_VALUES) { \ 463 ap2[_i] += value; \ 464 (void)PetscLogFlops(1.0); \ 465 } else ap2[_i] = value; \ 466 goto b_noinsert; \ 467 } \ 468 } \ 469 if (value == 0.0 && ignorezeroentries) { \ 470 low2 = 0; \ 471 high2 = nrow2; \ 472 goto b_noinsert; \ 473 } \ 474 if (nonew == 1) { \ 475 low2 = 0; \ 476 high2 = nrow2; \ 477 goto b_noinsert; \ 478 } \ 479 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 480 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 481 N = nrow2++ - 1; \ 482 b->nz++; \ 483 high2++; \ 484 /* shift up all the later entries in this row */ \ 485 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 486 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 487 rp2[_i] = col; \ 488 ap2[_i] = value; \ 489 b_noinsert:; \ 490 bilen[row] = nrow2; \ 491 } while (0) 492 493 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 494 { 495 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 497 PetscInt l, *garray = mat->garray, diag; 498 PetscScalar *aa, *ba; 499 500 PetscFunctionBegin; 501 /* code only works for square matrices A */ 502 503 /* find size of row to the left of the diagonal part */ 504 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 505 row = row - diag; 506 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 507 if (garray[b->j[b->i[row] + l]] > diag) break; 508 } 509 if (l) { 510 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 511 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 512 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 513 } 514 515 /* diagonal part */ 516 if (a->i[row + 1] - a->i[row]) { 517 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 518 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 519 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 520 } 521 522 /* right of diagonal part */ 523 if (b->i[row + 1] - b->i[row] - l) { 524 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 525 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 526 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 527 } 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 534 PetscScalar value = 0.0; 535 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 536 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 537 PetscBool roworiented = aij->roworiented; 538 539 /* Some Variables required in the macro */ 540 Mat A = aij->A; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 542 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 546 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 547 MatScalar *aa, *ba; 548 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1, *ap2; 551 552 PetscFunctionBegin; 553 PetscCall(MatSeqAIJGetArray(A, &aa)); 554 PetscCall(MatSeqAIJGetArray(B, &ba)); 555 for (i = 0; i < m; i++) { 556 if (im[i] < 0) continue; 557 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 562 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 569 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j = 0; j < n; j++) { 576 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 577 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 nonew = a->nonew; 581 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 582 } else if (in[j] < 0) { 583 continue; 584 } else { 585 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 586 if (mat->was_assembled) { 587 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 588 #if defined(PETSC_USE_CTABLE) 589 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 590 col--; 591 #else 592 col = aij->colmap[in[j]] - 1; 593 #endif 594 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 595 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 596 col = in[j]; 597 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 598 B = aij->B; 599 b = (Mat_SeqAIJ *)B->data; 600 bimax = b->imax; 601 bi = b->i; 602 bilen = b->ilen; 603 bj = b->j; 604 ba = b->a; 605 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 606 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 614 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 615 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 616 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 617 } 618 } else col = in[j]; 619 nonew = b->nonew; 620 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 621 } 622 } 623 } else { 624 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 625 if (!aij->donotstash) { 626 mat->assembled = PETSC_FALSE; 627 if (roworiented) { 628 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } else { 630 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 631 } 632 } 633 } 634 } 635 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 636 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 /* 641 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 642 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 643 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 644 */ 645 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 648 Mat A = aij->A; /* diagonal part of the matrix */ 649 Mat B = aij->B; /* off-diagonal part of the matrix */ 650 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 651 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 652 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 653 PetscInt *ailen = a->ilen, *aj = a->j; 654 PetscInt *bilen = b->ilen, *bj = b->j; 655 PetscInt am = aij->A->rmap->n, j; 656 PetscInt diag_so_far = 0, dnz; 657 PetscInt offd_so_far = 0, onz; 658 659 PetscFunctionBegin; 660 /* Iterate over all rows of the matrix */ 661 for (j = 0; j < am; j++) { 662 dnz = onz = 0; 663 /* Iterate over all non-zero columns of the current row */ 664 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 665 /* If column is in the diagonal */ 666 if (mat_j[col] >= cstart && mat_j[col] < cend) { 667 aj[diag_so_far++] = mat_j[col] - cstart; 668 dnz++; 669 } else { /* off-diagonal entries */ 670 bj[offd_so_far++] = mat_j[col]; 671 onz++; 672 } 673 } 674 ailen[j] = dnz; 675 bilen[j] = onz; 676 } 677 PetscFunctionReturn(PETSC_SUCCESS); 678 } 679 680 /* 681 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 682 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 683 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 684 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 685 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 686 */ 687 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 688 { 689 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 690 Mat A = aij->A; /* diagonal part of the matrix */ 691 Mat B = aij->B; /* off-diagonal part of the matrix */ 692 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 694 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 695 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 696 PetscInt *ailen = a->ilen, *aj = a->j; 697 PetscInt *bilen = b->ilen, *bj = b->j; 698 PetscInt am = aij->A->rmap->n, j; 699 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 700 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 701 PetscScalar *aa = a->a, *ba = b->a; 702 703 PetscFunctionBegin; 704 /* Iterate over all rows of the matrix */ 705 for (j = 0; j < am; j++) { 706 dnz_row = onz_row = 0; 707 rowstart_offd = full_offd_i[j]; 708 rowstart_diag = full_diag_i[j]; 709 /* Iterate over all non-zero columns of the current row */ 710 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 711 /* If column is in the diagonal */ 712 if (mat_j[col] >= cstart && mat_j[col] < cend) { 713 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 714 aa[rowstart_diag + dnz_row] = mat_a[col]; 715 dnz_row++; 716 } else { /* off-diagonal entries */ 717 bj[rowstart_offd + onz_row] = mat_j[col]; 718 ba[rowstart_offd + onz_row] = mat_a[col]; 719 onz_row++; 720 } 721 } 722 ailen[j] = dnz_row; 723 bilen[j] = onz_row; 724 } 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 729 { 730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 731 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 733 734 PetscFunctionBegin; 735 for (i = 0; i < m; i++) { 736 if (idxm[i] < 0) continue; /* negative row */ 737 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 738 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 739 row = idxm[i] - rstart; 740 for (j = 0; j < n; j++) { 741 if (idxn[j] < 0) continue; /* negative column */ 742 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 746 } else { 747 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 748 #if defined(PETSC_USE_CTABLE) 749 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 750 col--; 751 #else 752 col = aij->colmap[idxn[j]] - 1; 753 #endif 754 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 755 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 756 } 757 } 758 } 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 765 PetscInt nstash, reallocs; 766 767 PetscFunctionBegin; 768 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 769 770 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 771 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 772 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 779 PetscMPIInt n; 780 PetscInt i, j, rstart, ncols, flg; 781 PetscInt *row, *col; 782 PetscBool other_disassembled; 783 PetscScalar *val; 784 785 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 786 787 PetscFunctionBegin; 788 if (!aij->donotstash && !mat->nooffprocentries) { 789 while (1) { 790 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 791 if (!flg) break; 792 793 for (i = 0; i < n;) { 794 /* Now identify the consecutive vals belonging to the same row */ 795 for (j = i, rstart = row[j]; j < n; j++) { 796 if (row[j] != rstart) break; 797 } 798 if (j < n) ncols = j - i; 799 else ncols = n - i; 800 /* Now assemble all these values with a single function call */ 801 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 802 i = j; 803 } 804 } 805 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 806 } 807 #if defined(PETSC_HAVE_DEVICE) 808 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 809 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 810 if (mat->boundtocpu) { 811 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 812 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 813 } 814 #endif 815 PetscCall(MatAssemblyBegin(aij->A, mode)); 816 PetscCall(MatAssemblyEnd(aij->A, mode)); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourself, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 825 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 826 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 827 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 831 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 832 #if defined(PETSC_HAVE_DEVICE) 833 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 834 #endif 835 PetscCall(MatAssemblyBegin(aij->B, mode)); 836 PetscCall(MatAssemblyEnd(aij->B, mode)); 837 838 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 839 840 aij->rowvalues = NULL; 841 842 PetscCall(VecDestroy(&aij->diag)); 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 848 } 849 #if defined(PETSC_HAVE_DEVICE) 850 mat->offloadmask = PETSC_OFFLOAD_BOTH; 851 #endif 852 PetscFunctionReturn(PETSC_SUCCESS); 853 } 854 855 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 858 859 PetscFunctionBegin; 860 PetscCall(MatZeroEntries(l->A)); 861 PetscCall(MatZeroEntries(l->B)); 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 866 { 867 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 868 PetscInt *lrows; 869 PetscInt r, len; 870 PetscBool cong; 871 872 PetscFunctionBegin; 873 /* get locally owned rows */ 874 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 875 PetscCall(MatHasCongruentLayouts(A, &cong)); 876 /* fix right-hand side if needed */ 877 if (x && b) { 878 const PetscScalar *xx; 879 PetscScalar *bb; 880 881 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 882 PetscCall(VecGetArrayRead(x, &xx)); 883 PetscCall(VecGetArray(b, &bb)); 884 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 885 PetscCall(VecRestoreArrayRead(x, &xx)); 886 PetscCall(VecRestoreArray(b, &bb)); 887 } 888 889 if (diag != 0.0 && cong) { 890 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 891 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 893 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 894 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 895 PetscInt nnwA, nnwB; 896 PetscBool nnzA, nnzB; 897 898 nnwA = aijA->nonew; 899 nnwB = aijB->nonew; 900 nnzA = aijA->keepnonzeropattern; 901 nnzB = aijB->keepnonzeropattern; 902 if (!nnzA) { 903 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 904 aijA->nonew = 0; 905 } 906 if (!nnzB) { 907 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 908 aijB->nonew = 0; 909 } 910 /* Must zero here before the next loop */ 911 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 912 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 913 for (r = 0; r < len; ++r) { 914 const PetscInt row = lrows[r] + A->rmap->rstart; 915 if (row >= A->cmap->N) continue; 916 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 917 } 918 aijA->nonew = nnwA; 919 aijB->nonew = nnwB; 920 } else { 921 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 922 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 923 } 924 PetscCall(PetscFree(lrows)); 925 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 927 928 /* only change matrix nonzero state if pattern was allowed to be changed */ 929 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 930 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 931 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 937 { 938 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 939 PetscInt n = A->rmap->n; 940 PetscInt i, j, r, m, len = 0; 941 PetscInt *lrows, *owners = A->rmap->range; 942 PetscMPIInt p = 0; 943 PetscSFNode *rrows; 944 PetscSF sf; 945 const PetscScalar *xx; 946 PetscScalar *bb, *mask, *aij_a; 947 Vec xmask, lmask; 948 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 949 const PetscInt *aj, *ii, *ridx; 950 PetscScalar *aa; 951 952 PetscFunctionBegin; 953 /* Create SF where leaves are input rows and roots are owned rows */ 954 PetscCall(PetscMalloc1(n, &lrows)); 955 for (r = 0; r < n; ++r) lrows[r] = -1; 956 PetscCall(PetscMalloc1(N, &rrows)); 957 for (r = 0; r < N; ++r) { 958 const PetscInt idx = rows[r]; 959 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 960 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 961 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 962 } 963 rrows[r].rank = p; 964 rrows[r].index = rows[r] - owners[p]; 965 } 966 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 967 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 968 /* Collect flags for rows to be zeroed */ 969 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 970 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 971 PetscCall(PetscSFDestroy(&sf)); 972 /* Compress and put in row numbers */ 973 for (r = 0; r < n; ++r) 974 if (lrows[r] >= 0) lrows[len++] = r; 975 /* zero diagonal part of matrix */ 976 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 977 /* handle off-diagonal part of matrix */ 978 PetscCall(MatCreateVecs(A, &xmask, NULL)); 979 PetscCall(VecDuplicate(l->lvec, &lmask)); 980 PetscCall(VecGetArray(xmask, &bb)); 981 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 982 PetscCall(VecRestoreArray(xmask, &bb)); 983 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 984 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 985 PetscCall(VecDestroy(&xmask)); 986 if (x && b) { /* this code is buggy when the row and column layout don't match */ 987 PetscBool cong; 988 989 PetscCall(MatHasCongruentLayouts(A, &cong)); 990 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 991 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 992 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 993 PetscCall(VecGetArrayRead(l->lvec, &xx)); 994 PetscCall(VecGetArray(b, &bb)); 995 } 996 PetscCall(VecGetArray(lmask, &mask)); 997 /* remove zeroed rows of off-diagonal matrix */ 998 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 999 ii = aij->i; 1000 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 1001 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1002 if (aij->compressedrow.use) { 1003 m = aij->compressedrow.nrows; 1004 ii = aij->compressedrow.i; 1005 ridx = aij->compressedrow.rindex; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 1011 for (j = 0; j < n; j++) { 1012 if (PetscAbsScalar(mask[*aj])) { 1013 if (b) bb[*ridx] -= *aa * xx[*aj]; 1014 *aa = 0.0; 1015 } 1016 aa++; 1017 aj++; 1018 } 1019 ridx++; 1020 } 1021 } else { /* do not use compressed row format */ 1022 m = l->B->rmap->n; 1023 for (i = 0; i < m; i++) { 1024 n = ii[i + 1] - ii[i]; 1025 aj = aij->j + ii[i]; 1026 aa = aij_a + ii[i]; 1027 for (j = 0; j < n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[i] -= *aa * xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 } 1036 } 1037 if (x && b) { 1038 PetscCall(VecRestoreArray(b, &bb)); 1039 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1040 } 1041 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1042 PetscCall(VecRestoreArray(lmask, &mask)); 1043 PetscCall(VecDestroy(&lmask)); 1044 PetscCall(PetscFree(lrows)); 1045 1046 /* only change matrix nonzero state if pattern was allowed to be changed */ 1047 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1048 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1049 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1050 } 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1057 PetscInt nt; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecGetLocalSize(xx, &nt)); 1062 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->A, mult, xx, yy); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1076 PetscFunctionReturn(PETSC_SUCCESS); 1077 } 1078 1079 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1082 VecScatter Mvctx = a->Mvctx; 1083 1084 PetscFunctionBegin; 1085 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1087 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1088 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1095 1096 PetscFunctionBegin; 1097 /* do nondiagonal part */ 1098 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1099 /* do local part */ 1100 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1101 /* add partial results together */ 1102 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1103 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1108 { 1109 MPI_Comm comm; 1110 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1111 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1112 IS Me, Notme; 1113 PetscInt M, N, first, last, *notme, i; 1114 PetscBool lf; 1115 PetscMPIInt size; 1116 1117 PetscFunctionBegin; 1118 /* Easy test: symmetric diagonal block */ 1119 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1120 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1121 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1122 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1123 PetscCallMPI(MPI_Comm_size(comm, &size)); 1124 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1125 1126 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1127 PetscCall(MatGetSize(Amat, &M, &N)); 1128 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1129 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1130 for (i = 0; i < first; i++) notme[i] = i; 1131 for (i = last; i < M; i++) notme[i - last + first] = i; 1132 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1133 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1134 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1135 Aoff = Aoffs[0]; 1136 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1137 Boff = Boffs[0]; 1138 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1139 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1140 PetscCall(MatDestroyMatrices(1, &Boffs)); 1141 PetscCall(ISDestroy(&Me)); 1142 PetscCall(ISDestroy(&Notme)); 1143 PetscCall(PetscFree(notme)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1154 /* do local part */ 1155 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1156 /* add partial results together */ 1157 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1158 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1172 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1173 PetscCall(MatGetDiagonal(a->A, v)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCall(MatScale(a->A, aa)); 1183 PetscCall(MatScale(a->B, aa)); 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 } 1186 1187 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1192 const PetscInt *garray = aij->garray; 1193 const PetscScalar *aa, *ba; 1194 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1195 PetscInt64 nz, hnz; 1196 PetscInt *rowlens; 1197 PetscInt *colidxs; 1198 PetscScalar *matvals; 1199 PetscMPIInt rank; 1200 1201 PetscFunctionBegin; 1202 PetscCall(PetscViewerSetUp(viewer)); 1203 1204 M = mat->rmap->N; 1205 N = mat->cmap->N; 1206 m = mat->rmap->n; 1207 rs = mat->rmap->rstart; 1208 cs = mat->cmap->rstart; 1209 nz = A->nz + B->nz; 1210 1211 /* write matrix header */ 1212 header[0] = MAT_FILE_CLASSID; 1213 header[1] = M; 1214 header[2] = N; 1215 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1216 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1217 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1218 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1219 1220 /* fill in and store row lengths */ 1221 PetscCall(PetscMalloc1(m, &rowlens)); 1222 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1223 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1224 PetscCall(PetscFree(rowlens)); 1225 1226 /* fill in and store column indices */ 1227 PetscCall(PetscMalloc1(nz, &colidxs)); 1228 for (cnt = 0, i = 0; i < m; i++) { 1229 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1234 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1235 } 1236 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1237 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1238 PetscCall(PetscFree(colidxs)); 1239 1240 /* fill in and store nonzero values */ 1241 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1242 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1243 PetscCall(PetscMalloc1(nz, &matvals)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 matvals[cnt++] = ba[jb]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1250 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1251 } 1252 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1254 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1255 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1256 PetscCall(PetscFree(matvals)); 1257 1258 /* write block size option to the viewer's .info file */ 1259 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 #include <petscdraw.h> 1264 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1267 PetscMPIInt rank = aij->rank, size = aij->size; 1268 PetscBool isdraw, iascii, isbinary; 1269 PetscViewer sviewer; 1270 PetscViewerFormat format; 1271 1272 PetscFunctionBegin; 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1274 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1275 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1276 if (iascii) { 1277 PetscCall(PetscViewerGetFormat(viewer, &format)); 1278 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1279 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1280 PetscCall(PetscMalloc1(size, &nz)); 1281 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1282 for (i = 0; i < size; i++) { 1283 nmax = PetscMax(nmax, nz[i]); 1284 nmin = PetscMin(nmin, nz[i]); 1285 navg += nz[i]; 1286 } 1287 PetscCall(PetscFree(nz)); 1288 navg = navg / size; 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 PetscCall(PetscViewerGetFormat(viewer, &format)); 1293 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1294 MatInfo info; 1295 PetscInt *inodes = NULL; 1296 1297 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1298 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1299 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1300 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1301 if (!inodes) { 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1303 info.memory)); 1304 } else { 1305 PetscCall( 1306 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1307 } 1308 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1312 PetscCall(PetscViewerFlush(viewer)); 1313 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1315 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1318 PetscInt inodecount, inodelimit, *inodes; 1319 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1320 if (inodes) { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1322 } else { 1323 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1324 } 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 } else if (isbinary) { 1330 if (size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 } else { 1334 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1335 } 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } else if (iascii && size == 1) { 1338 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1339 PetscCall(MatView(aij->A, viewer)); 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } else if (isdraw) { 1342 PetscDraw draw; 1343 PetscBool isnull; 1344 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1345 PetscCall(PetscDrawIsNull(draw, &isnull)); 1346 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 { /* assemble the entire matrix onto first processor */ 1350 Mat A = NULL, Av; 1351 IS isrow, iscol; 1352 1353 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1355 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1356 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1357 /* The commented code uses MatCreateSubMatrices instead */ 1358 /* 1359 Mat *AA, A = NULL, Av; 1360 IS isrow,iscol; 1361 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1363 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1364 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1365 if (rank == 0) { 1366 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1367 A = AA[0]; 1368 Av = AA[0]; 1369 } 1370 PetscCall(MatDestroySubMatrices(1,&AA)); 1371 */ 1372 PetscCall(ISDestroy(&iscol)); 1373 PetscCall(ISDestroy(&isrow)); 1374 /* 1375 Everyone has to call to draw the matrix since the graphics waits are 1376 synchronized across all processors that share the PetscDraw object 1377 */ 1378 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1379 if (rank == 0) { 1380 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1381 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1382 } 1383 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1384 PetscCall(MatDestroy(&A)); 1385 } 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1390 { 1391 PetscBool iascii, isdraw, issocket, isbinary; 1392 1393 PetscFunctionBegin; 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1396 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1397 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1398 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1403 { 1404 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1405 Vec bb1 = NULL; 1406 PetscBool hasop; 1407 1408 PetscFunctionBegin; 1409 if (flag == SOR_APPLY_UPPER) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1415 1416 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417 if (flag & SOR_ZERO_INITIAL_GUESS) { 1418 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1419 its--; 1420 } 1421 1422 while (its--) { 1423 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1424 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1425 1426 /* update rhs: bb1 = bb - B*x */ 1427 PetscCall(VecScale(mat->lvec, -1.0)); 1428 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1429 1430 /* local sweep */ 1431 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1432 } 1433 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 while (its--) { 1439 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1440 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 1442 /* update rhs: bb1 = bb - B*x */ 1443 PetscCall(VecScale(mat->lvec, -1.0)); 1444 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1445 1446 /* local sweep */ 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1448 } 1449 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1452 its--; 1453 } 1454 while (its--) { 1455 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1456 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 PetscCall(VecScale(mat->lvec, -1.0)); 1460 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1464 } 1465 } else if (flag & SOR_EISENSTAT) { 1466 Vec xx1; 1467 1468 PetscCall(VecDuplicate(bb, &xx1)); 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1470 1471 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1472 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 if (!mat->diag) { 1474 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1475 PetscCall(MatGetDiagonal(matin, mat->diag)); 1476 } 1477 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1478 if (hasop) { 1479 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1480 } else { 1481 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1482 } 1483 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1484 1485 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1489 PetscCall(VecAXPY(xx, 1.0, xx1)); 1490 PetscCall(VecDestroy(&xx1)); 1491 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1492 1493 PetscCall(VecDestroy(&bb1)); 1494 1495 matin->factorerrortype = mat->A->factorerrortype; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1500 { 1501 Mat aA, aB, Aperm; 1502 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1503 PetscScalar *aa, *ba; 1504 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1505 PetscSF rowsf, sf; 1506 IS parcolp = NULL; 1507 PetscBool done; 1508 1509 PetscFunctionBegin; 1510 PetscCall(MatGetLocalSize(A, &m, &n)); 1511 PetscCall(ISGetIndices(rowp, &rwant)); 1512 PetscCall(ISGetIndices(colp, &cwant)); 1513 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1517 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1518 PetscCall(PetscSFSetFromOptions(rowsf)); 1519 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1525 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1526 PetscCall(PetscSFSetFromOptions(sf)); 1527 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1528 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1529 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1530 PetscCall(PetscSFDestroy(&sf)); 1531 1532 PetscCall(ISRestoreIndices(rowp, &rwant)); 1533 PetscCall(ISRestoreIndices(colp, &cwant)); 1534 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1535 1536 /* Find out where my gcols should go */ 1537 PetscCall(MatGetSize(aB, NULL, &ng)); 1538 PetscCall(PetscMalloc1(ng, &gcdest)); 1539 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1540 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1541 PetscCall(PetscSFSetFromOptions(sf)); 1542 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1544 PetscCall(PetscSFDestroy(&sf)); 1545 1546 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1547 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1548 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1549 for (i = 0; i < m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1553 for (j = ai[i]; j < ai[i + 1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j = bi[i]; j < bi[i + 1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1570 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1571 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1572 PetscCall(PetscSFDestroy(&rowsf)); 1573 1574 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1575 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1576 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1577 for (i = 0; i < m; i++) { 1578 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0, rowlen; 1580 rowlen = ai[i + 1] - ai[i]; 1581 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1583 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1584 } 1585 rowlen = bi[i + 1] - bi[i]; 1586 for (j0 = j = 0; j < rowlen; j0 = j) { 1587 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1588 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1589 } 1590 } 1591 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1592 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1594 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1595 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1596 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1597 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1598 PetscCall(PetscFree3(work, rdest, cdest)); 1599 PetscCall(PetscFree(gcdest)); 1600 if (parcolp) PetscCall(ISDestroy(&colp)); 1601 *B = Aperm; 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1608 1609 PetscFunctionBegin; 1610 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1611 if (ghosts) *ghosts = aij->garray; 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1616 { 1617 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1618 Mat A = mat->A, B = mat->B; 1619 PetscLogDouble isend[5], irecv[5]; 1620 1621 PetscFunctionBegin; 1622 info->block_size = 1.0; 1623 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1624 1625 isend[0] = info->nz_used; 1626 isend[1] = info->nz_allocated; 1627 isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; 1629 isend[4] = info->mallocs; 1630 1631 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1632 1633 isend[0] += info->nz_used; 1634 isend[1] += info->nz_allocated; 1635 isend[2] += info->nz_unneeded; 1636 isend[3] += info->memory; 1637 isend[4] += info->mallocs; 1638 if (flag == MAT_LOCAL) { 1639 info->nz_used = isend[0]; 1640 info->nz_allocated = isend[1]; 1641 info->nz_unneeded = isend[2]; 1642 info->memory = isend[3]; 1643 info->mallocs = isend[4]; 1644 } else if (flag == MAT_GLOBAL_MAX) { 1645 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } else if (flag == MAT_GLOBAL_SUM) { 1653 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1654 1655 info->nz_used = irecv[0]; 1656 info->nz_allocated = irecv[1]; 1657 info->nz_unneeded = irecv[2]; 1658 info->memory = irecv[3]; 1659 info->mallocs = irecv[4]; 1660 } 1661 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1662 info->fill_ratio_needed = 0; 1663 info->factor_mallocs = 0; 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1668 { 1669 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1670 1671 PetscFunctionBegin; 1672 switch (op) { 1673 case MAT_NEW_NONZERO_LOCATIONS: 1674 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1675 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1676 case MAT_KEEP_NONZERO_PATTERN: 1677 case MAT_NEW_NONZERO_LOCATION_ERR: 1678 case MAT_USE_INODES: 1679 case MAT_IGNORE_ZERO_ENTRIES: 1680 case MAT_FORM_EXPLICIT_TRANSPOSE: 1681 MatCheckPreallocated(A, 1); 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_ROW_ORIENTED: 1686 MatCheckPreallocated(A, 1); 1687 a->roworiented = flg; 1688 1689 PetscCall(MatSetOption(a->A, op, flg)); 1690 PetscCall(MatSetOption(a->B, op, flg)); 1691 break; 1692 case MAT_FORCE_DIAGONAL_ENTRIES: 1693 case MAT_SORTED_FULL: 1694 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1695 break; 1696 case MAT_IGNORE_OFF_PROC_ENTRIES: 1697 a->donotstash = flg; 1698 break; 1699 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1700 case MAT_SPD: 1701 case MAT_SYMMETRIC: 1702 case MAT_STRUCTURALLY_SYMMETRIC: 1703 case MAT_HERMITIAN: 1704 case MAT_SYMMETRY_ETERNAL: 1705 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1706 case MAT_SPD_ETERNAL: 1707 /* if the diagonal matrix is square it inherits some of the properties above */ 1708 break; 1709 case MAT_SUBMAT_SINGLEIS: 1710 A->submat_singleis = flg; 1711 break; 1712 case MAT_STRUCTURE_ONLY: 1713 /* The option is handled directly by MatSetOption() */ 1714 break; 1715 default: 1716 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1717 } 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1722 { 1723 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1724 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1725 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1726 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1727 PetscInt *cmap, *idx_p; 1728 1729 PetscFunctionBegin; 1730 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1731 mat->getrowactive = PETSC_TRUE; 1732 1733 if (!mat->rowvalues && (idx || v)) { 1734 /* 1735 allocate enough space to hold information from the longest row. 1736 */ 1737 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1738 PetscInt max = 1, tmp; 1739 for (i = 0; i < matin->rmap->n; i++) { 1740 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1741 if (max < tmp) max = tmp; 1742 } 1743 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1744 } 1745 1746 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1747 lrow = row - rstart; 1748 1749 pvA = &vworkA; 1750 pcA = &cworkA; 1751 pvB = &vworkB; 1752 pcB = &cworkB; 1753 if (!v) { 1754 pvA = NULL; 1755 pvB = NULL; 1756 } 1757 if (!idx) { 1758 pcA = NULL; 1759 if (!v) pcB = NULL; 1760 } 1761 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1762 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1763 nztot = nzA + nzB; 1764 1765 cmap = mat->garray; 1766 if (v || idx) { 1767 if (nztot) { 1768 /* Sort by increasing column numbers, assuming A and B already sorted */ 1769 PetscInt imark = -1; 1770 if (v) { 1771 *v = v_p = mat->rowvalues; 1772 for (i = 0; i < nzB; i++) { 1773 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1774 else break; 1775 } 1776 imark = i; 1777 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1778 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1779 } 1780 if (idx) { 1781 *idx = idx_p = mat->rowindices; 1782 if (imark > -1) { 1783 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1784 } else { 1785 for (i = 0; i < nzB; i++) { 1786 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1787 else break; 1788 } 1789 imark = i; 1790 } 1791 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1792 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1793 } 1794 } else { 1795 if (idx) *idx = NULL; 1796 if (v) *v = NULL; 1797 } 1798 } 1799 *nz = nztot; 1800 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1801 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1808 1809 PetscFunctionBegin; 1810 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1811 aij->getrowactive = PETSC_FALSE; 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1816 { 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1818 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1819 PetscInt i, j, cstart = mat->cmap->rstart; 1820 PetscReal sum = 0.0; 1821 const MatScalar *v, *amata, *bmata; 1822 PetscMPIInt iN; 1823 1824 PetscFunctionBegin; 1825 if (aij->size == 1) { 1826 PetscCall(MatNorm(aij->A, type, norm)); 1827 } else { 1828 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1829 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1830 if (type == NORM_FROBENIUS) { 1831 v = amata; 1832 for (i = 0; i < amat->nz; i++) { 1833 sum += PetscRealPart(PetscConj(*v) * (*v)); 1834 v++; 1835 } 1836 v = bmata; 1837 for (i = 0; i < bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v) * (*v)); 1839 v++; 1840 } 1841 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1842 *norm = PetscSqrtReal(*norm); 1843 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1844 } else if (type == NORM_1) { /* max column norm */ 1845 PetscReal *tmp, *tmp2; 1846 PetscInt *jj, *garray = aij->garray; 1847 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1848 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1849 *norm = 0.0; 1850 v = amata; 1851 jj = amat->j; 1852 for (j = 0; j < amat->nz; j++) { 1853 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = bmata; 1857 jj = bmat->j; 1858 for (j = 0; j < bmat->nz; j++) { 1859 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1860 v++; 1861 } 1862 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1863 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1864 for (j = 0; j < mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 PetscCall(PetscFree(tmp)); 1868 PetscCall(PetscFree(tmp2)); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j = 0; j < aij->A->rmap->n; j++) { 1873 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1874 sum = 0.0; 1875 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); 1877 v++; 1878 } 1879 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1880 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); 1882 v++; 1883 } 1884 if (sum > ntemp) ntemp = sum; 1885 } 1886 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1890 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1891 } 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1898 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1899 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1900 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1901 Mat B, A_diag, *B_diag; 1902 const MatScalar *pbv, *bv; 1903 1904 PetscFunctionBegin; 1905 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1906 ma = A->rmap->n; 1907 na = A->cmap->n; 1908 mb = a->B->rmap->n; 1909 nb = a->B->cmap->n; 1910 ai = Aloc->i; 1911 aj = Aloc->j; 1912 bi = Bloc->i; 1913 bj = Bloc->j; 1914 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1915 PetscInt *d_nnz, *g_nnz, *o_nnz; 1916 PetscSFNode *oloc; 1917 PETSC_UNUSED PetscSF sf; 1918 1919 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1920 /* compute d_nnz for preallocation */ 1921 PetscCall(PetscArrayzero(d_nnz, na)); 1922 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1923 /* compute local off-diagonal contributions */ 1924 PetscCall(PetscArrayzero(g_nnz, nb)); 1925 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1926 /* map those to global */ 1927 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1928 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1929 PetscCall(PetscSFSetFromOptions(sf)); 1930 PetscCall(PetscArrayzero(o_nnz, na)); 1931 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1932 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1933 PetscCall(PetscSFDestroy(&sf)); 1934 1935 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1936 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1937 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1938 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1939 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1940 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1941 } else { 1942 B = *matout; 1943 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1944 } 1945 1946 b = (Mat_MPIAIJ *)B->data; 1947 A_diag = a->A; 1948 B_diag = &b->A; 1949 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1950 A_diag_ncol = A_diag->cmap->N; 1951 B_diag_ilen = sub_B_diag->ilen; 1952 B_diag_i = sub_B_diag->i; 1953 1954 /* Set ilen for diagonal of B */ 1955 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1956 1957 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1958 very quickly (=without using MatSetValues), because all writes are local. */ 1959 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1960 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1961 1962 /* copy over the B part */ 1963 PetscCall(PetscMalloc1(bi[mb], &cols)); 1964 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i = 0; i < mb; i++) { 1970 ncol = bi[i + 1] - bi[i]; 1971 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1972 row++; 1973 if (pbv) pbv += ncol; 1974 if (cols_tmp) cols_tmp += ncol; 1975 } 1976 PetscCall(PetscFree(cols)); 1977 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1978 1979 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1981 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1982 *matout = B; 1983 } else { 1984 PetscCall(MatHeaderMerge(A, &B)); 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1990 { 1991 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1992 Mat a = aij->A, b = aij->B; 1993 PetscInt s1, s2, s3; 1994 1995 PetscFunctionBegin; 1996 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1997 if (rr) { 1998 PetscCall(VecGetLocalSize(rr, &s1)); 1999 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 } 2003 if (ll) { 2004 PetscCall(VecGetLocalSize(ll, &s1)); 2005 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2006 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2007 } 2008 /* scale the diagonal block */ 2009 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2014 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2015 } 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(MatSetUnfactored(a->A)); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2029 { 2030 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2031 Mat a, b, c, d; 2032 PetscBool flg; 2033 2034 PetscFunctionBegin; 2035 a = matA->A; 2036 b = matA->B; 2037 c = matB->A; 2038 d = matB->B; 2039 2040 PetscCall(MatEqual(a, c, &flg)); 2041 if (flg) PetscCall(MatEqual(b, d, &flg)); 2042 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2047 { 2048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2049 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2050 2051 PetscFunctionBegin; 2052 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2053 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2054 /* because of the column compression in the off-processor part of the matrix a->B, 2055 the number of columns in a->B and b->B may be different, hence we cannot call 2056 the MatCopy() directly on the two parts. If need be, we can provide a more 2057 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2058 then copying the submatrices */ 2059 PetscCall(MatCopy_Basic(A, B, str)); 2060 } else { 2061 PetscCall(MatCopy(a->A, b->A, str)); 2062 PetscCall(MatCopy(a->B, b->B, str)); 2063 } 2064 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2073 { 2074 PetscInt i, j, k, nzx, nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i = 0; i < m; i++) { 2079 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2080 nzx = xi[i + 1] - xi[i]; 2081 nzy = yi[i + 1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2084 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k < nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2112 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d, *nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2124 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2129 PetscCall(MatHeaderMerge(Y, &B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2173 PetscInt i, *idxb = NULL, m = A->rmap->n; 2174 PetscScalar *va, *vv; 2175 Vec vB, vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA, &va)); 2183 if (idx) { 2184 for (i = 0; i < m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2190 PetscCall(PetscMalloc1(m, &idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v, &vv)); 2194 PetscCall(VecGetArrayRead(vB, &vb)); 2195 for (i = 0; i < m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2202 } 2203 } 2204 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2205 PetscCall(VecRestoreArrayWrite(vA, &va)); 2206 PetscCall(VecRestoreArrayRead(vB, &vb)); 2207 PetscCall(PetscFree(idxb)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscCall(VecDestroy(&vB)); 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2214 { 2215 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2216 Vec vB, vA; 2217 2218 PetscFunctionBegin; 2219 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2220 PetscCall(MatGetRowSumAbs(a->A, vA)); 2221 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2222 PetscCall(MatGetRowSumAbs(a->B, vB)); 2223 PetscCall(VecAXPY(vA, 1.0, vB)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscCall(VecCopy(vA, v)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } 2345 2346 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not Collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 PetscBool isaij; 2644 2645 PetscFunctionBegin; 2646 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2647 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2648 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@ 2653 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2654 2655 Collective 2656 2657 Input Parameters: 2658 + A - the matrix 2659 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2660 2661 Level: advanced 2662 2663 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2664 @*/ 2665 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2666 { 2667 PetscFunctionBegin; 2668 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2673 { 2674 PetscBool sc = PETSC_FALSE, flg; 2675 2676 PetscFunctionBegin; 2677 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2678 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2679 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2680 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2681 PetscOptionsHeadEnd(); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2686 { 2687 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2688 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2689 2690 PetscFunctionBegin; 2691 if (!Y->preallocated) { 2692 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2693 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2694 PetscInt nonew = aij->nonew; 2695 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2696 aij->nonew = nonew; 2697 } 2698 PetscCall(MatShift_Basic(Y, a)); 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2703 { 2704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2708 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2709 if (d) { 2710 PetscInt rstart; 2711 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2712 *d += rstart; 2713 } 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2718 { 2719 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2720 2721 PetscFunctionBegin; 2722 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2727 { 2728 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2729 2730 PetscFunctionBegin; 2731 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2732 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2733 PetscFunctionReturn(PETSC_SUCCESS); 2734 } 2735 2736 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2737 MatGetRow_MPIAIJ, 2738 MatRestoreRow_MPIAIJ, 2739 MatMult_MPIAIJ, 2740 /* 4*/ MatMultAdd_MPIAIJ, 2741 MatMultTranspose_MPIAIJ, 2742 MatMultTransposeAdd_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*10*/ NULL, 2747 NULL, 2748 NULL, 2749 MatSOR_MPIAIJ, 2750 MatTranspose_MPIAIJ, 2751 /*15*/ MatGetInfo_MPIAIJ, 2752 MatEqual_MPIAIJ, 2753 MatGetDiagonal_MPIAIJ, 2754 MatDiagonalScale_MPIAIJ, 2755 MatNorm_MPIAIJ, 2756 /*20*/ MatAssemblyBegin_MPIAIJ, 2757 MatAssemblyEnd_MPIAIJ, 2758 MatSetOption_MPIAIJ, 2759 MatZeroEntries_MPIAIJ, 2760 /*24*/ MatZeroRows_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /*29*/ MatSetUp_MPI_Hash, 2766 NULL, 2767 NULL, 2768 MatGetDiagonalBlock_MPIAIJ, 2769 NULL, 2770 /*34*/ MatDuplicate_MPIAIJ, 2771 NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 /*39*/ MatAXPY_MPIAIJ, 2776 MatCreateSubMatrices_MPIAIJ, 2777 MatIncreaseOverlap_MPIAIJ, 2778 MatGetValues_MPIAIJ, 2779 MatCopy_MPIAIJ, 2780 /*44*/ MatGetRowMax_MPIAIJ, 2781 MatScale_MPIAIJ, 2782 MatShift_MPIAIJ, 2783 MatDiagonalSet_MPIAIJ, 2784 MatZeroRowsColumns_MPIAIJ, 2785 /*49*/ MatSetRandom_MPIAIJ, 2786 MatGetRowIJ_MPIAIJ, 2787 MatRestoreRowIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*54*/ MatFDColoringCreate_MPIXAIJ, 2791 NULL, 2792 MatSetUnfactored_MPIAIJ, 2793 MatPermute_MPIAIJ, 2794 NULL, 2795 /*59*/ MatCreateSubMatrix_MPIAIJ, 2796 MatDestroy_MPIAIJ, 2797 MatView_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*64*/ NULL, 2801 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2806 MatGetRowMinAbs_MPIAIJ, 2807 NULL, 2808 NULL, 2809 NULL, 2810 NULL, 2811 /*75*/ MatFDColoringApply_AIJ, 2812 MatSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatFindZeroDiagonals_MPIAIJ, 2816 /*80*/ NULL, 2817 NULL, 2818 NULL, 2819 /*83*/ MatLoad_MPIAIJ, 2820 NULL, 2821 NULL, 2822 NULL, 2823 NULL, 2824 NULL, 2825 /*89*/ NULL, 2826 NULL, 2827 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2831 NULL, 2832 NULL, 2833 NULL, 2834 MatBindToCPU_MPIAIJ, 2835 /*99*/ MatProductSetFromOptions_MPIAIJ, 2836 NULL, 2837 NULL, 2838 MatConjugate_MPIAIJ, 2839 NULL, 2840 /*104*/ MatSetValuesRow_MPIAIJ, 2841 MatRealPart_MPIAIJ, 2842 MatImaginaryPart_MPIAIJ, 2843 NULL, 2844 NULL, 2845 /*109*/ NULL, 2846 NULL, 2847 MatGetRowMin_MPIAIJ, 2848 NULL, 2849 MatMissingDiagonal_MPIAIJ, 2850 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2851 NULL, 2852 MatGetGhosts_MPIAIJ, 2853 NULL, 2854 NULL, 2855 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2856 NULL, 2857 NULL, 2858 NULL, 2859 MatGetMultiProcBlock_MPIAIJ, 2860 /*124*/ MatFindNonzeroRows_MPIAIJ, 2861 MatGetColumnReductions_MPIAIJ, 2862 MatInvertBlockDiagonal_MPIAIJ, 2863 MatInvertVariableBlockDiagonal_MPIAIJ, 2864 MatCreateSubMatricesMPI_MPIAIJ, 2865 /*129*/ NULL, 2866 NULL, 2867 NULL, 2868 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2869 NULL, 2870 /*134*/ NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 NULL, 2875 /*139*/ MatSetBlockSizes_MPIAIJ, 2876 NULL, 2877 NULL, 2878 MatFDColoringSetUp_MPIXAIJ, 2879 MatFindOffBlockDiagonalEntries_MPIAIJ, 2880 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2881 /*145*/ NULL, 2882 NULL, 2883 NULL, 2884 MatCreateGraph_Simple_AIJ, 2885 NULL, 2886 /*150*/ NULL, 2887 MatEliminateZeros_MPIAIJ, 2888 MatGetRowSumAbs_MPIAIJ, 2889 NULL, 2890 NULL, 2891 /*155*/ NULL, 2892 MatCopyHashToXAIJ_MPI_Hash}; 2893 2894 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2895 { 2896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2897 2898 PetscFunctionBegin; 2899 PetscCall(MatStoreValues(aij->A)); 2900 PetscCall(MatStoreValues(aij->B)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2905 { 2906 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2907 2908 PetscFunctionBegin; 2909 PetscCall(MatRetrieveValues(aij->A)); 2910 PetscCall(MatRetrieveValues(aij->B)); 2911 PetscFunctionReturn(PETSC_SUCCESS); 2912 } 2913 2914 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2915 { 2916 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2917 PetscMPIInt size; 2918 2919 PetscFunctionBegin; 2920 if (B->hash_active) { 2921 B->ops[0] = b->cops; 2922 B->hash_active = PETSC_FALSE; 2923 } 2924 PetscCall(PetscLayoutSetUp(B->rmap)); 2925 PetscCall(PetscLayoutSetUp(B->cmap)); 2926 2927 #if defined(PETSC_USE_CTABLE) 2928 PetscCall(PetscHMapIDestroy(&b->colmap)); 2929 #else 2930 PetscCall(PetscFree(b->colmap)); 2931 #endif 2932 PetscCall(PetscFree(b->garray)); 2933 PetscCall(VecDestroy(&b->lvec)); 2934 PetscCall(VecScatterDestroy(&b->Mvctx)); 2935 2936 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2937 2938 MatSeqXAIJGetOptions_Private(b->B); 2939 PetscCall(MatDestroy(&b->B)); 2940 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2941 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2942 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2943 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2944 MatSeqXAIJRestoreOptions_Private(b->B); 2945 2946 MatSeqXAIJGetOptions_Private(b->A); 2947 PetscCall(MatDestroy(&b->A)); 2948 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2949 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2950 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2951 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2952 MatSeqXAIJRestoreOptions_Private(b->A); 2953 2954 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2955 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2956 B->preallocated = PETSC_TRUE; 2957 B->was_assembled = PETSC_FALSE; 2958 B->assembled = PETSC_FALSE; 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2963 { 2964 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2965 2966 PetscFunctionBegin; 2967 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2968 PetscCall(PetscLayoutSetUp(B->rmap)); 2969 PetscCall(PetscLayoutSetUp(B->cmap)); 2970 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2971 else { 2972 #if defined(PETSC_USE_CTABLE) 2973 PetscCall(PetscHMapIDestroy(&b->colmap)); 2974 #else 2975 PetscCall(PetscFree(b->colmap)); 2976 #endif 2977 PetscCall(PetscFree(b->garray)); 2978 PetscCall(VecDestroy(&b->lvec)); 2979 } 2980 PetscCall(VecScatterDestroy(&b->Mvctx)); 2981 2982 PetscCall(MatResetPreallocation(b->A)); 2983 PetscCall(MatResetPreallocation(b->B)); 2984 B->preallocated = PETSC_TRUE; 2985 B->was_assembled = PETSC_FALSE; 2986 B->assembled = PETSC_FALSE; 2987 PetscFunctionReturn(PETSC_SUCCESS); 2988 } 2989 2990 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2991 { 2992 Mat mat; 2993 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2994 2995 PetscFunctionBegin; 2996 *newmat = NULL; 2997 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2998 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2999 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 3000 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3001 a = (Mat_MPIAIJ *)mat->data; 3002 3003 mat->factortype = matin->factortype; 3004 mat->assembled = matin->assembled; 3005 mat->insertmode = NOT_SET_VALUES; 3006 3007 a->size = oldmat->size; 3008 a->rank = oldmat->rank; 3009 a->donotstash = oldmat->donotstash; 3010 a->roworiented = oldmat->roworiented; 3011 a->rowindices = NULL; 3012 a->rowvalues = NULL; 3013 a->getrowactive = PETSC_FALSE; 3014 3015 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3016 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3017 if (matin->hash_active) { 3018 PetscCall(MatSetUp(mat)); 3019 } else { 3020 mat->preallocated = matin->preallocated; 3021 if (oldmat->colmap) { 3022 #if defined(PETSC_USE_CTABLE) 3023 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3024 #else 3025 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3026 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3027 #endif 3028 } else a->colmap = NULL; 3029 if (oldmat->garray) { 3030 PetscInt len; 3031 len = oldmat->B->cmap->n; 3032 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3033 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3034 } else a->garray = NULL; 3035 3036 /* It may happen MatDuplicate is called with a non-assembled matrix 3037 In fact, MatDuplicate only requires the matrix to be preallocated 3038 This may happen inside a DMCreateMatrix_Shell */ 3039 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3040 if (oldmat->Mvctx) { 3041 a->Mvctx = oldmat->Mvctx; 3042 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3043 } 3044 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3045 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3046 } 3047 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3048 *newmat = mat; 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 3052 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3053 { 3054 PetscBool isbinary, ishdf5; 3055 3056 PetscFunctionBegin; 3057 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3058 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3059 /* force binary viewer to load .info file if it has not yet done so */ 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3062 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3063 if (isbinary) { 3064 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3065 } else if (ishdf5) { 3066 #if defined(PETSC_HAVE_HDF5) 3067 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3068 #else 3069 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3070 #endif 3071 } else { 3072 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3073 } 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3078 { 3079 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3080 PetscInt *rowidxs, *colidxs; 3081 PetscScalar *matvals; 3082 3083 PetscFunctionBegin; 3084 PetscCall(PetscViewerSetUp(viewer)); 3085 3086 /* read in matrix header */ 3087 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3088 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3089 M = header[1]; 3090 N = header[2]; 3091 nz = header[3]; 3092 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3093 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3094 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3095 3096 /* set block sizes from the viewer's .info file */ 3097 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3098 /* set global sizes if not set already */ 3099 if (mat->rmap->N < 0) mat->rmap->N = M; 3100 if (mat->cmap->N < 0) mat->cmap->N = N; 3101 PetscCall(PetscLayoutSetUp(mat->rmap)); 3102 PetscCall(PetscLayoutSetUp(mat->cmap)); 3103 3104 /* check if the matrix sizes are correct */ 3105 PetscCall(MatGetSize(mat, &rows, &cols)); 3106 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3107 3108 /* read in row lengths and build row indices */ 3109 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3110 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3111 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3112 rowidxs[0] = 0; 3113 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3114 if (nz != PETSC_INT_MAX) { 3115 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3116 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3117 } 3118 3119 /* read in column indices and matrix values */ 3120 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3121 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3122 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3123 /* store matrix indices and values */ 3124 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3125 PetscCall(PetscFree(rowidxs)); 3126 PetscCall(PetscFree2(colidxs, matvals)); 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /* Not scalable because of ISAllGather() unless getting all columns. */ 3131 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3132 { 3133 IS iscol_local; 3134 PetscBool isstride; 3135 PetscMPIInt lisstride = 0, gisstride; 3136 3137 PetscFunctionBegin; 3138 /* check if we are grabbing all columns*/ 3139 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3140 3141 if (isstride) { 3142 PetscInt start, len, mstart, mlen; 3143 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3144 PetscCall(ISGetLocalSize(iscol, &len)); 3145 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3146 if (mstart == start && mlen - mstart == len) lisstride = 1; 3147 } 3148 3149 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3150 if (gisstride) { 3151 PetscInt N; 3152 PetscCall(MatGetSize(mat, NULL, &N)); 3153 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3154 PetscCall(ISSetIdentity(iscol_local)); 3155 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3156 } else { 3157 PetscInt cbs; 3158 PetscCall(ISGetBlockSize(iscol, &cbs)); 3159 PetscCall(ISAllGather(iscol, &iscol_local)); 3160 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3161 } 3162 3163 *isseq = iscol_local; 3164 PetscFunctionReturn(PETSC_SUCCESS); 3165 } 3166 3167 /* 3168 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3169 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3170 3171 Input Parameters: 3172 + mat - matrix 3173 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3174 i.e., mat->rstart <= isrow[i] < mat->rend 3175 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3176 i.e., mat->cstart <= iscol[i] < mat->cend 3177 3178 Output Parameters: 3179 + isrow_d - sequential row index set for retrieving mat->A 3180 . iscol_d - sequential column index set for retrieving mat->A 3181 . iscol_o - sequential column index set for retrieving mat->B 3182 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3183 */ 3184 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3185 { 3186 Vec x, cmap; 3187 const PetscInt *is_idx; 3188 PetscScalar *xarray, *cmaparray; 3189 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3190 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3191 Mat B = a->B; 3192 Vec lvec = a->lvec, lcmap; 3193 PetscInt i, cstart, cend, Bn = B->cmap->N; 3194 MPI_Comm comm; 3195 VecScatter Mvctx = a->Mvctx; 3196 3197 PetscFunctionBegin; 3198 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3199 PetscCall(ISGetLocalSize(iscol, &ncols)); 3200 3201 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3202 PetscCall(MatCreateVecs(mat, &x, NULL)); 3203 PetscCall(VecSet(x, -1.0)); 3204 PetscCall(VecDuplicate(x, &cmap)); 3205 PetscCall(VecSet(cmap, -1.0)); 3206 3207 /* Get start indices */ 3208 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3209 isstart -= ncols; 3210 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3211 3212 PetscCall(ISGetIndices(iscol, &is_idx)); 3213 PetscCall(VecGetArray(x, &xarray)); 3214 PetscCall(VecGetArray(cmap, &cmaparray)); 3215 PetscCall(PetscMalloc1(ncols, &idx)); 3216 for (i = 0; i < ncols; i++) { 3217 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3218 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3219 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3220 } 3221 PetscCall(VecRestoreArray(x, &xarray)); 3222 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3223 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3224 3225 /* Get iscol_d */ 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3227 PetscCall(ISGetBlockSize(iscol, &i)); 3228 PetscCall(ISSetBlockSize(*iscol_d, i)); 3229 3230 /* Get isrow_d */ 3231 PetscCall(ISGetLocalSize(isrow, &m)); 3232 rstart = mat->rmap->rstart; 3233 PetscCall(PetscMalloc1(m, &idx)); 3234 PetscCall(ISGetIndices(isrow, &is_idx)); 3235 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3236 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3237 3238 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3239 PetscCall(ISGetBlockSize(isrow, &i)); 3240 PetscCall(ISSetBlockSize(*isrow_d, i)); 3241 3242 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3243 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3244 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3245 3246 PetscCall(VecDuplicate(lvec, &lcmap)); 3247 3248 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3249 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3250 3251 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3252 /* off-process column indices */ 3253 count = 0; 3254 PetscCall(PetscMalloc1(Bn, &idx)); 3255 PetscCall(PetscMalloc1(Bn, &cmap1)); 3256 3257 PetscCall(VecGetArray(lvec, &xarray)); 3258 PetscCall(VecGetArray(lcmap, &cmaparray)); 3259 for (i = 0; i < Bn; i++) { 3260 if (PetscRealPart(xarray[i]) > -1.0) { 3261 idx[count] = i; /* local column index in off-diagonal part B */ 3262 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3263 count++; 3264 } 3265 } 3266 PetscCall(VecRestoreArray(lvec, &xarray)); 3267 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3268 3269 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3270 /* cannot ensure iscol_o has same blocksize as iscol! */ 3271 3272 PetscCall(PetscFree(idx)); 3273 *garray = cmap1; 3274 3275 PetscCall(VecDestroy(&x)); 3276 PetscCall(VecDestroy(&cmap)); 3277 PetscCall(VecDestroy(&lcmap)); 3278 PetscFunctionReturn(PETSC_SUCCESS); 3279 } 3280 3281 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3282 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3283 { 3284 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3285 Mat M = NULL; 3286 MPI_Comm comm; 3287 IS iscol_d, isrow_d, iscol_o; 3288 Mat Asub = NULL, Bsub = NULL; 3289 PetscInt n; 3290 3291 PetscFunctionBegin; 3292 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3293 3294 if (call == MAT_REUSE_MATRIX) { 3295 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3296 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3297 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3298 3299 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3300 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3301 3302 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3303 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3304 3305 /* Update diagonal and off-diagonal portions of submat */ 3306 asub = (Mat_MPIAIJ *)(*submat)->data; 3307 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3308 PetscCall(ISGetLocalSize(iscol_o, &n)); 3309 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3310 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3311 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3312 3313 } else { /* call == MAT_INITIAL_MATRIX) */ 3314 PetscInt *garray; 3315 PetscInt BsubN; 3316 3317 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3318 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3319 3320 /* Create local submatrices Asub and Bsub */ 3321 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3322 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3323 3324 /* Create submatrix M */ 3325 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3326 3327 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3328 asub = (Mat_MPIAIJ *)M->data; 3329 3330 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3331 n = asub->B->cmap->N; 3332 if (BsubN > n) { 3333 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3334 const PetscInt *idx; 3335 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3336 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3337 3338 PetscCall(PetscMalloc1(n, &idx_new)); 3339 j = 0; 3340 PetscCall(ISGetIndices(iscol_o, &idx)); 3341 for (i = 0; i < n; i++) { 3342 if (j >= BsubN) break; 3343 while (subgarray[i] > garray[j]) j++; 3344 3345 if (subgarray[i] == garray[j]) { 3346 idx_new[i] = idx[j++]; 3347 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3348 } 3349 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3350 3351 PetscCall(ISDestroy(&iscol_o)); 3352 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3353 3354 } else if (BsubN < n) { 3355 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3356 } 3357 3358 PetscCall(PetscFree(garray)); 3359 *submat = M; 3360 3361 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3362 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3363 PetscCall(ISDestroy(&isrow_d)); 3364 3365 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3366 PetscCall(ISDestroy(&iscol_d)); 3367 3368 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3369 PetscCall(ISDestroy(&iscol_o)); 3370 } 3371 PetscFunctionReturn(PETSC_SUCCESS); 3372 } 3373 3374 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3375 { 3376 IS iscol_local = NULL, isrow_d; 3377 PetscInt csize; 3378 PetscInt n, i, j, start, end; 3379 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3380 MPI_Comm comm; 3381 3382 PetscFunctionBegin; 3383 /* If isrow has same processor distribution as mat, 3384 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3385 if (call == MAT_REUSE_MATRIX) { 3386 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3387 if (isrow_d) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3390 } else { 3391 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3392 if (iscol_local) { 3393 sameRowDist = PETSC_TRUE; 3394 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3395 } 3396 } 3397 } else { 3398 /* Check if isrow has same processor distribution as mat */ 3399 sameDist[0] = PETSC_FALSE; 3400 PetscCall(ISGetLocalSize(isrow, &n)); 3401 if (!n) { 3402 sameDist[0] = PETSC_TRUE; 3403 } else { 3404 PetscCall(ISGetMinMax(isrow, &i, &j)); 3405 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3406 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3407 } 3408 3409 /* Check if iscol has same processor distribution as mat */ 3410 sameDist[1] = PETSC_FALSE; 3411 PetscCall(ISGetLocalSize(iscol, &n)); 3412 if (!n) { 3413 sameDist[1] = PETSC_TRUE; 3414 } else { 3415 PetscCall(ISGetMinMax(iscol, &i, &j)); 3416 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3417 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3418 } 3419 3420 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3421 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3422 sameRowDist = tsameDist[0]; 3423 } 3424 3425 if (sameRowDist) { 3426 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3427 /* isrow and iscol have same processor distribution as mat */ 3428 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3429 PetscFunctionReturn(PETSC_SUCCESS); 3430 } else { /* sameRowDist */ 3431 /* isrow has same processor distribution as mat */ 3432 if (call == MAT_INITIAL_MATRIX) { 3433 PetscBool sorted; 3434 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3435 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3436 PetscCall(ISGetSize(iscol, &i)); 3437 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3438 3439 PetscCall(ISSorted(iscol_local, &sorted)); 3440 if (sorted) { 3441 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3442 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3443 PetscFunctionReturn(PETSC_SUCCESS); 3444 } 3445 } else { /* call == MAT_REUSE_MATRIX */ 3446 IS iscol_sub; 3447 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3448 if (iscol_sub) { 3449 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3450 PetscFunctionReturn(PETSC_SUCCESS); 3451 } 3452 } 3453 } 3454 } 3455 3456 /* General case: iscol -> iscol_local which has global size of iscol */ 3457 if (call == MAT_REUSE_MATRIX) { 3458 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3459 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3460 } else { 3461 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3462 } 3463 3464 PetscCall(ISGetLocalSize(iscol, &csize)); 3465 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3466 3467 if (call == MAT_INITIAL_MATRIX) { 3468 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3469 PetscCall(ISDestroy(&iscol_local)); 3470 } 3471 PetscFunctionReturn(PETSC_SUCCESS); 3472 } 3473 3474 /*@C 3475 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3476 and "off-diagonal" part of the matrix in CSR format. 3477 3478 Collective 3479 3480 Input Parameters: 3481 + comm - MPI communicator 3482 . A - "diagonal" portion of matrix 3483 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3484 - garray - global index of `B` columns 3485 3486 Output Parameter: 3487 . mat - the matrix, with input `A` as its local diagonal matrix 3488 3489 Level: advanced 3490 3491 Notes: 3492 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3493 3494 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3495 3496 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3497 @*/ 3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3499 { 3500 Mat_MPIAIJ *maij; 3501 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3502 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3503 const PetscScalar *oa; 3504 Mat Bnew; 3505 PetscInt m, n, N; 3506 MatType mpi_mat_type; 3507 3508 PetscFunctionBegin; 3509 PetscCall(MatCreate(comm, mat)); 3510 PetscCall(MatGetSize(A, &m, &n)); 3511 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3512 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3513 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3514 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3515 3516 /* Get global columns of mat */ 3517 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3518 3519 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3520 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3521 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3522 PetscCall(MatSetType(*mat, mpi_mat_type)); 3523 3524 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3525 maij = (Mat_MPIAIJ *)(*mat)->data; 3526 3527 (*mat)->preallocated = PETSC_TRUE; 3528 3529 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3530 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3531 3532 /* Set A as diagonal portion of *mat */ 3533 maij->A = A; 3534 3535 nz = oi[m]; 3536 for (i = 0; i < nz; i++) { 3537 col = oj[i]; 3538 oj[i] = garray[col]; 3539 } 3540 3541 /* Set Bnew as off-diagonal portion of *mat */ 3542 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3543 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3544 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3545 bnew = (Mat_SeqAIJ *)Bnew->data; 3546 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3547 maij->B = Bnew; 3548 3549 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3550 3551 b->free_a = PETSC_FALSE; 3552 b->free_ij = PETSC_FALSE; 3553 PetscCall(MatDestroy(&B)); 3554 3555 bnew->free_a = PETSC_TRUE; 3556 bnew->free_ij = PETSC_TRUE; 3557 3558 /* condense columns of maij->B */ 3559 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3560 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3561 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3562 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3563 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3564 PetscFunctionReturn(PETSC_SUCCESS); 3565 } 3566 3567 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3568 3569 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3570 { 3571 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3572 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3574 Mat M, Msub, B = a->B; 3575 MatScalar *aa; 3576 Mat_SeqAIJ *aij; 3577 PetscInt *garray = a->garray, *colsub, Ncols; 3578 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3579 IS iscol_sub, iscmap; 3580 const PetscInt *is_idx, *cmap; 3581 PetscBool allcolumns = PETSC_FALSE; 3582 MPI_Comm comm; 3583 3584 PetscFunctionBegin; 3585 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3586 if (call == MAT_REUSE_MATRIX) { 3587 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3588 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3589 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3590 3591 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3592 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3593 3594 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3595 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3596 3597 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3598 3599 } else { /* call == MAT_INITIAL_MATRIX) */ 3600 PetscBool flg; 3601 3602 PetscCall(ISGetLocalSize(iscol, &n)); 3603 PetscCall(ISGetSize(iscol, &Ncols)); 3604 3605 /* (1) iscol -> nonscalable iscol_local */ 3606 /* Check for special case: each processor gets entire matrix columns */ 3607 PetscCall(ISIdentity(iscol_local, &flg)); 3608 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3609 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3610 if (allcolumns) { 3611 iscol_sub = iscol_local; 3612 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3613 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3614 3615 } else { 3616 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3617 PetscInt *idx, *cmap1, k; 3618 PetscCall(PetscMalloc1(Ncols, &idx)); 3619 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3620 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3621 count = 0; 3622 k = 0; 3623 for (i = 0; i < Ncols; i++) { 3624 j = is_idx[i]; 3625 if (j >= cstart && j < cend) { 3626 /* diagonal part of mat */ 3627 idx[count] = j; 3628 cmap1[count++] = i; /* column index in submat */ 3629 } else if (Bn) { 3630 /* off-diagonal part of mat */ 3631 if (j == garray[k]) { 3632 idx[count] = j; 3633 cmap1[count++] = i; /* column index in submat */ 3634 } else if (j > garray[k]) { 3635 while (j > garray[k] && k < Bn - 1) k++; 3636 if (j == garray[k]) { 3637 idx[count] = j; 3638 cmap1[count++] = i; /* column index in submat */ 3639 } 3640 } 3641 } 3642 } 3643 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3644 3645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3646 PetscCall(ISGetBlockSize(iscol, &cbs)); 3647 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3648 3649 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3650 } 3651 3652 /* (3) Create sequential Msub */ 3653 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3654 } 3655 3656 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3657 aij = (Mat_SeqAIJ *)Msub->data; 3658 ii = aij->i; 3659 PetscCall(ISGetIndices(iscmap, &cmap)); 3660 3661 /* 3662 m - number of local rows 3663 Ncols - number of columns (same on all processors) 3664 rstart - first row in new global matrix generated 3665 */ 3666 PetscCall(MatGetSize(Msub, &m, NULL)); 3667 3668 if (call == MAT_INITIAL_MATRIX) { 3669 /* (4) Create parallel newmat */ 3670 PetscMPIInt rank, size; 3671 PetscInt csize; 3672 3673 PetscCallMPI(MPI_Comm_size(comm, &size)); 3674 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3675 3676 /* 3677 Determine the number of non-zeros in the diagonal and off-diagonal 3678 portions of the matrix in order to do correct preallocation 3679 */ 3680 3681 /* first get start and end of "diagonal" columns */ 3682 PetscCall(ISGetLocalSize(iscol, &csize)); 3683 if (csize == PETSC_DECIDE) { 3684 PetscCall(ISGetSize(isrow, &mglobal)); 3685 if (mglobal == Ncols) { /* square matrix */ 3686 nlocal = m; 3687 } else { 3688 nlocal = Ncols / size + ((Ncols % size) > rank); 3689 } 3690 } else { 3691 nlocal = csize; 3692 } 3693 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3694 rstart = rend - nlocal; 3695 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3696 3697 /* next, compute all the lengths */ 3698 jj = aij->j; 3699 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3700 olens = dlens + m; 3701 for (i = 0; i < m; i++) { 3702 jend = ii[i + 1] - ii[i]; 3703 olen = 0; 3704 dlen = 0; 3705 for (j = 0; j < jend; j++) { 3706 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3707 else dlen++; 3708 jj++; 3709 } 3710 olens[i] = olen; 3711 dlens[i] = dlen; 3712 } 3713 3714 PetscCall(ISGetBlockSize(isrow, &bs)); 3715 PetscCall(ISGetBlockSize(iscol, &cbs)); 3716 3717 PetscCall(MatCreate(comm, &M)); 3718 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3719 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3720 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3721 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3722 PetscCall(PetscFree(dlens)); 3723 3724 } else { /* call == MAT_REUSE_MATRIX */ 3725 M = *newmat; 3726 PetscCall(MatGetLocalSize(M, &i, NULL)); 3727 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3728 PetscCall(MatZeroEntries(M)); 3729 /* 3730 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3731 rather than the slower MatSetValues(). 3732 */ 3733 M->was_assembled = PETSC_TRUE; 3734 M->assembled = PETSC_FALSE; 3735 } 3736 3737 /* (5) Set values of Msub to *newmat */ 3738 PetscCall(PetscMalloc1(count, &colsub)); 3739 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3740 3741 jj = aij->j; 3742 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3743 for (i = 0; i < m; i++) { 3744 row = rstart + i; 3745 nz = ii[i + 1] - ii[i]; 3746 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3747 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3748 jj += nz; 3749 aa += nz; 3750 } 3751 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3752 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3753 3754 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3755 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3756 3757 PetscCall(PetscFree(colsub)); 3758 3759 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3760 if (call == MAT_INITIAL_MATRIX) { 3761 *newmat = M; 3762 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3763 PetscCall(MatDestroy(&Msub)); 3764 3765 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3766 PetscCall(ISDestroy(&iscol_sub)); 3767 3768 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3769 PetscCall(ISDestroy(&iscmap)); 3770 3771 if (iscol_local) { 3772 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3773 PetscCall(ISDestroy(&iscol_local)); 3774 } 3775 } 3776 PetscFunctionReturn(PETSC_SUCCESS); 3777 } 3778 3779 /* 3780 Not great since it makes two copies of the submatrix, first an SeqAIJ 3781 in local and then by concatenating the local matrices the end result. 3782 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3783 3784 This requires a sequential iscol with all indices. 3785 */ 3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3787 { 3788 PetscMPIInt rank, size; 3789 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3790 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3791 Mat M, Mreuse; 3792 MatScalar *aa, *vwork; 3793 MPI_Comm comm; 3794 Mat_SeqAIJ *aij; 3795 PetscBool colflag, allcolumns = PETSC_FALSE; 3796 3797 PetscFunctionBegin; 3798 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3799 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3800 PetscCallMPI(MPI_Comm_size(comm, &size)); 3801 3802 /* Check for special case: each processor gets entire matrix columns */ 3803 PetscCall(ISIdentity(iscol, &colflag)); 3804 PetscCall(ISGetLocalSize(iscol, &n)); 3805 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3806 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3807 3808 if (call == MAT_REUSE_MATRIX) { 3809 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3810 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3811 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3812 } else { 3813 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3814 } 3815 3816 /* 3817 m - number of local rows 3818 n - number of columns (same on all processors) 3819 rstart - first row in new global matrix generated 3820 */ 3821 PetscCall(MatGetSize(Mreuse, &m, &n)); 3822 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3823 if (call == MAT_INITIAL_MATRIX) { 3824 aij = (Mat_SeqAIJ *)Mreuse->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* 3829 Determine the number of non-zeros in the diagonal and off-diagonal 3830 portions of the matrix in order to do correct preallocation 3831 */ 3832 3833 /* first get start and end of "diagonal" columns */ 3834 if (csize == PETSC_DECIDE) { 3835 PetscCall(ISGetSize(isrow, &mglobal)); 3836 if (mglobal == n) { /* square matrix */ 3837 nlocal = m; 3838 } else { 3839 nlocal = n / size + ((n % size) > rank); 3840 } 3841 } else { 3842 nlocal = csize; 3843 } 3844 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3845 rstart = rend - nlocal; 3846 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3847 3848 /* next, compute all the lengths */ 3849 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3850 olens = dlens + m; 3851 for (i = 0; i < m; i++) { 3852 jend = ii[i + 1] - ii[i]; 3853 olen = 0; 3854 dlen = 0; 3855 for (j = 0; j < jend; j++) { 3856 if (*jj < rstart || *jj >= rend) olen++; 3857 else dlen++; 3858 jj++; 3859 } 3860 olens[i] = olen; 3861 dlens[i] = dlen; 3862 } 3863 PetscCall(MatCreate(comm, &M)); 3864 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3865 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3866 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3867 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3868 PetscCall(PetscFree(dlens)); 3869 } else { 3870 PetscInt ml, nl; 3871 3872 M = *newmat; 3873 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3874 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3875 PetscCall(MatZeroEntries(M)); 3876 /* 3877 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3878 rather than the slower MatSetValues(). 3879 */ 3880 M->was_assembled = PETSC_TRUE; 3881 M->assembled = PETSC_FALSE; 3882 } 3883 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3884 aij = (Mat_SeqAIJ *)Mreuse->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 3888 /* trigger copy to CPU if needed */ 3889 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3890 for (i = 0; i < m; i++) { 3891 row = rstart + i; 3892 nz = ii[i + 1] - ii[i]; 3893 cwork = jj; 3894 jj = PetscSafePointerPlusOffset(jj, nz); 3895 vwork = aa; 3896 aa = PetscSafePointerPlusOffset(aa, nz); 3897 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3898 } 3899 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3900 3901 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3902 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3903 *newmat = M; 3904 3905 /* save submatrix used in processor for next request */ 3906 if (call == MAT_INITIAL_MATRIX) { 3907 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3908 PetscCall(MatDestroy(&Mreuse)); 3909 } 3910 PetscFunctionReturn(PETSC_SUCCESS); 3911 } 3912 3913 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3914 { 3915 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3916 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3917 const PetscInt *JJ; 3918 PetscBool nooffprocentries; 3919 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3920 3921 PetscFunctionBegin; 3922 PetscCall(PetscLayoutSetUp(B->rmap)); 3923 PetscCall(PetscLayoutSetUp(B->cmap)); 3924 m = B->rmap->n; 3925 cstart = B->cmap->rstart; 3926 cend = B->cmap->rend; 3927 rstart = B->rmap->rstart; 3928 irstart = Ii[0]; 3929 3930 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3931 3932 if (PetscDefined(USE_DEBUG)) { 3933 for (i = 0; i < m; i++) { 3934 nnz = Ii[i + 1] - Ii[i]; 3935 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3936 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3937 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3938 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3939 } 3940 } 3941 3942 for (i = 0; i < m; i++) { 3943 nnz = Ii[i + 1] - Ii[i]; 3944 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3945 nnz_max = PetscMax(nnz_max, nnz); 3946 d = 0; 3947 for (j = 0; j < nnz; j++) { 3948 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3949 } 3950 d_nnz[i] = d; 3951 o_nnz[i] = nnz - d; 3952 } 3953 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3954 PetscCall(PetscFree2(d_nnz, o_nnz)); 3955 3956 for (i = 0; i < m; i++) { 3957 ii = i + rstart; 3958 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3959 } 3960 nooffprocentries = B->nooffprocentries; 3961 B->nooffprocentries = PETSC_TRUE; 3962 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3963 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3964 B->nooffprocentries = nooffprocentries; 3965 3966 /* count number of entries below block diagonal */ 3967 PetscCall(PetscFree(Aij->ld)); 3968 PetscCall(PetscCalloc1(m, &ld)); 3969 Aij->ld = ld; 3970 for (i = 0; i < m; i++) { 3971 nnz = Ii[i + 1] - Ii[i]; 3972 j = 0; 3973 while (j < nnz && J[j] < cstart) j++; 3974 ld[i] = j; 3975 if (J) J += nnz; 3976 } 3977 3978 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3979 PetscFunctionReturn(PETSC_SUCCESS); 3980 } 3981 3982 /*@ 3983 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3984 (the default parallel PETSc format). 3985 3986 Collective 3987 3988 Input Parameters: 3989 + B - the matrix 3990 . i - the indices into `j` for the start of each local row (indices start with zero) 3991 . j - the column indices for each local row (indices start with zero) 3992 - v - optional values in the matrix 3993 3994 Level: developer 3995 3996 Notes: 3997 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3998 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3999 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4000 4001 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4002 4003 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4004 4005 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4006 4007 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4008 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4009 4010 The format which is used for the sparse matrix input, is equivalent to a 4011 row-major ordering.. i.e for the following matrix, the input data expected is 4012 as shown 4013 .vb 4014 1 0 0 4015 2 0 3 P0 4016 ------- 4017 4 5 6 P1 4018 4019 Process0 [P0] rows_owned=[0,1] 4020 i = {0,1,3} [size = nrow+1 = 2+1] 4021 j = {0,0,2} [size = 3] 4022 v = {1,2,3} [size = 3] 4023 4024 Process1 [P1] rows_owned=[2] 4025 i = {0,3} [size = nrow+1 = 1+1] 4026 j = {0,1,2} [size = 3] 4027 v = {4,5,6} [size = 3] 4028 .ve 4029 4030 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4031 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4032 @*/ 4033 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4034 { 4035 PetscFunctionBegin; 4036 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4037 PetscFunctionReturn(PETSC_SUCCESS); 4038 } 4039 4040 /*@ 4041 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4042 (the default parallel PETSc format). For good matrix assembly performance 4043 the user should preallocate the matrix storage by setting the parameters 4044 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4045 4046 Collective 4047 4048 Input Parameters: 4049 + B - the matrix 4050 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4051 (same value is used for all local rows) 4052 . d_nnz - array containing the number of nonzeros in the various rows of the 4053 DIAGONAL portion of the local submatrix (possibly different for each row) 4054 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4055 The size of this array is equal to the number of local rows, i.e 'm'. 4056 For matrices that will be factored, you must leave room for (and set) 4057 the diagonal entry even if it is zero. 4058 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4059 submatrix (same value is used for all local rows). 4060 - o_nnz - array containing the number of nonzeros in the various rows of the 4061 OFF-DIAGONAL portion of the local submatrix (possibly different for 4062 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4063 structure. The size of this array is equal to the number 4064 of local rows, i.e 'm'. 4065 4066 Example Usage: 4067 Consider the following 8x8 matrix with 34 non-zero values, that is 4068 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4069 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4070 as follows 4071 4072 .vb 4073 1 2 0 | 0 3 0 | 0 4 4074 Proc0 0 5 6 | 7 0 0 | 8 0 4075 9 0 10 | 11 0 0 | 12 0 4076 ------------------------------------- 4077 13 0 14 | 15 16 17 | 0 0 4078 Proc1 0 18 0 | 19 20 21 | 0 0 4079 0 0 0 | 22 23 0 | 24 0 4080 ------------------------------------- 4081 Proc2 25 26 27 | 0 0 28 | 29 0 4082 30 0 0 | 31 32 33 | 0 34 4083 .ve 4084 4085 This can be represented as a collection of submatrices as 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4104 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4105 4106 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4107 allocated for every row of the local diagonal submatrix, and `o_nz` 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of `d_nz`, `o_nz` are 4112 .vb 4113 proc0 dnz = 2, o_nz = 2 4114 proc1 dnz = 3, o_nz = 2 4115 proc2 dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for `d_nnz`, `o_nnz` are 4125 .vb 4126 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 Notes: 4136 If the *_nnz parameter is given then the *_nz parameter is ignored 4137 4138 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4139 storage. The stored row and column indices begin with zero. 4140 See [Sparse Matrices](sec_matsparse) for details. 4141 4142 The parallel matrix is partitioned such that the first m0 rows belong to 4143 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4144 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4145 4146 The DIAGONAL portion of the local submatrix of a processor can be defined 4147 as the submatrix which is obtained by extraction the part corresponding to 4148 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4149 first row that belongs to the processor, r2 is the last row belonging to 4150 the this processor, and c1-c2 is range of indices of the local part of a 4151 vector suitable for applying the matrix to. This is an mxn matrix. In the 4152 common case of a square matrix, the row and column ranges are the same and 4153 the DIAGONAL part is also square. The remaining portion of the local 4154 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4155 4156 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4157 4158 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4159 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4160 You can also run with the option `-info` and look for messages with the string 4161 malloc in them to see if additional memory allocation was needed. 4162 4163 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4164 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4165 @*/ 4166 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4167 { 4168 PetscFunctionBegin; 4169 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4170 PetscValidType(B, 1); 4171 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4172 PetscFunctionReturn(PETSC_SUCCESS); 4173 } 4174 4175 /*@ 4176 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4177 CSR format for the local rows. 4178 4179 Collective 4180 4181 Input Parameters: 4182 + comm - MPI communicator 4183 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4186 calculated if `N` is given) For square matrices n is almost always `m`. 4187 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4188 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4189 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4190 . j - global column indices 4191 - a - optional matrix values 4192 4193 Output Parameter: 4194 . mat - the matrix 4195 4196 Level: intermediate 4197 4198 Notes: 4199 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4200 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4201 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4202 4203 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4204 4205 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4206 4207 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4208 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4209 4210 The format which is used for the sparse matrix input, is equivalent to a 4211 row-major ordering, i.e., for the following matrix, the input data expected is 4212 as shown 4213 .vb 4214 1 0 0 4215 2 0 3 P0 4216 ------- 4217 4 5 6 P1 4218 4219 Process0 [P0] rows_owned=[0,1] 4220 i = {0,1,3} [size = nrow+1 = 2+1] 4221 j = {0,0,2} [size = 3] 4222 v = {1,2,3} [size = 3] 4223 4224 Process1 [P1] rows_owned=[2] 4225 i = {0,3} [size = nrow+1 = 1+1] 4226 j = {0,1,2} [size = 3] 4227 v = {4,5,6} [size = 3] 4228 .ve 4229 4230 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4231 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4232 @*/ 4233 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4234 { 4235 PetscFunctionBegin; 4236 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4237 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4238 PetscCall(MatCreate(comm, mat)); 4239 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4240 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4241 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4242 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4243 PetscFunctionReturn(PETSC_SUCCESS); 4244 } 4245 4246 /*@ 4247 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4248 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4249 from `MatCreateMPIAIJWithArrays()` 4250 4251 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4252 4253 Collective 4254 4255 Input Parameters: 4256 + mat - the matrix 4257 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4258 . n - This value should be the same as the local size used in creating the 4259 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4260 calculated if N is given) For square matrices n is almost always m. 4261 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4262 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4263 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4264 . J - column indices 4265 - v - matrix values 4266 4267 Level: deprecated 4268 4269 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4270 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4271 @*/ 4272 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4273 { 4274 PetscInt nnz, i; 4275 PetscBool nooffprocentries; 4276 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4277 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4278 PetscScalar *ad, *ao; 4279 PetscInt ldi, Iii, md; 4280 const PetscInt *Adi = Ad->i; 4281 PetscInt *ld = Aij->ld; 4282 4283 PetscFunctionBegin; 4284 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4285 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4286 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4287 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4288 4289 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4290 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4291 4292 for (i = 0; i < m; i++) { 4293 if (PetscDefined(USE_DEBUG)) { 4294 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4295 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4296 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4297 } 4298 } 4299 nnz = Ii[i + 1] - Ii[i]; 4300 Iii = Ii[i]; 4301 ldi = ld[i]; 4302 md = Adi[i + 1] - Adi[i]; 4303 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4304 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4305 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4306 ad += md; 4307 ao += nnz - md; 4308 } 4309 nooffprocentries = mat->nooffprocentries; 4310 mat->nooffprocentries = PETSC_TRUE; 4311 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4312 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4313 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4314 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4315 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4316 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4317 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4318 mat->nooffprocentries = nooffprocentries; 4319 PetscFunctionReturn(PETSC_SUCCESS); 4320 } 4321 4322 /*@ 4323 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4324 4325 Collective 4326 4327 Input Parameters: 4328 + mat - the matrix 4329 - v - matrix values, stored by row 4330 4331 Level: intermediate 4332 4333 Notes: 4334 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4335 4336 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4337 4338 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4339 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4340 @*/ 4341 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4342 { 4343 PetscInt nnz, i, m; 4344 PetscBool nooffprocentries; 4345 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4346 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4347 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4348 PetscScalar *ad, *ao; 4349 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4350 PetscInt ldi, Iii, md; 4351 PetscInt *ld = Aij->ld; 4352 4353 PetscFunctionBegin; 4354 m = mat->rmap->n; 4355 4356 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4357 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4358 Iii = 0; 4359 for (i = 0; i < m; i++) { 4360 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4361 ldi = ld[i]; 4362 md = Adi[i + 1] - Adi[i]; 4363 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4364 ad += md; 4365 if (ao) { 4366 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4367 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4368 ao += nnz - md; 4369 } 4370 Iii += nnz; 4371 } 4372 nooffprocentries = mat->nooffprocentries; 4373 mat->nooffprocentries = PETSC_TRUE; 4374 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4375 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4376 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4377 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4378 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4379 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4380 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4381 mat->nooffprocentries = nooffprocentries; 4382 PetscFunctionReturn(PETSC_SUCCESS); 4383 } 4384 4385 /*@ 4386 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4387 (the default parallel PETSc format). For good matrix assembly performance 4388 the user should preallocate the matrix storage by setting the parameters 4389 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4390 4391 Collective 4392 4393 Input Parameters: 4394 + comm - MPI communicator 4395 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4396 This value should be the same as the local size used in creating the 4397 y vector for the matrix-vector product y = Ax. 4398 . n - This value should be the same as the local size used in creating the 4399 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4400 calculated if N is given) For square matrices n is almost always m. 4401 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4402 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4403 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4404 (same value is used for all local rows) 4405 . d_nnz - array containing the number of nonzeros in the various rows of the 4406 DIAGONAL portion of the local submatrix (possibly different for each row) 4407 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4408 The size of this array is equal to the number of local rows, i.e 'm'. 4409 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4410 submatrix (same value is used for all local rows). 4411 - o_nnz - array containing the number of nonzeros in the various rows of the 4412 OFF-DIAGONAL portion of the local submatrix (possibly different for 4413 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4414 structure. The size of this array is equal to the number 4415 of local rows, i.e 'm'. 4416 4417 Output Parameter: 4418 . A - the matrix 4419 4420 Options Database Keys: 4421 + -mat_no_inode - Do not use inodes 4422 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4423 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4424 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4425 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4426 4427 Level: intermediate 4428 4429 Notes: 4430 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4431 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4432 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4433 4434 If the *_nnz parameter is given then the *_nz parameter is ignored 4435 4436 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4437 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4438 storage requirements for this matrix. 4439 4440 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4441 processor than it must be used on all processors that share the object for 4442 that argument. 4443 4444 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4445 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4446 4447 The user MUST specify either the local or global matrix dimensions 4448 (possibly both). 4449 4450 The parallel matrix is partitioned across processors such that the 4451 first `m0` rows belong to process 0, the next `m1` rows belong to 4452 process 1, the next `m2` rows belong to process 2, etc., where 4453 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4454 values corresponding to [m x N] submatrix. 4455 4456 The columns are logically partitioned with the n0 columns belonging 4457 to 0th partition, the next n1 columns belonging to the next 4458 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4459 4460 The DIAGONAL portion of the local submatrix on any given processor 4461 is the submatrix corresponding to the rows and columns m,n 4462 corresponding to the given processor. i.e diagonal matrix on 4463 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4464 etc. The remaining portion of the local submatrix [m x (N-n)] 4465 constitute the OFF-DIAGONAL portion. The example below better 4466 illustrates this concept. 4467 4468 For a square global matrix we define each processor's diagonal portion 4469 to be its local rows and the corresponding columns (a square submatrix); 4470 each processor's off-diagonal portion encompasses the remainder of the 4471 local matrix (a rectangular submatrix). 4472 4473 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4474 4475 When calling this routine with a single process communicator, a matrix of 4476 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4477 type of communicator, use the construction mechanism 4478 .vb 4479 MatCreate(..., &A); 4480 MatSetType(A, MATMPIAIJ); 4481 MatSetSizes(A, m, n, M, N); 4482 MatMPIAIJSetPreallocation(A, ...); 4483 .ve 4484 4485 By default, this format uses inodes (identical nodes) when possible. 4486 We search for consecutive rows with the same nonzero structure, thereby 4487 reusing matrix information to achieve increased efficiency. 4488 4489 Example Usage: 4490 Consider the following 8x8 matrix with 34 non-zero values, that is 4491 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4492 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4493 as follows 4494 4495 .vb 4496 1 2 0 | 0 3 0 | 0 4 4497 Proc0 0 5 6 | 7 0 0 | 8 0 4498 9 0 10 | 11 0 0 | 12 0 4499 ------------------------------------- 4500 13 0 14 | 15 16 17 | 0 0 4501 Proc1 0 18 0 | 19 20 21 | 0 0 4502 0 0 0 | 22 23 0 | 24 0 4503 ------------------------------------- 4504 Proc2 25 26 27 | 0 0 28 | 29 0 4505 30 0 0 | 31 32 33 | 0 34 4506 .ve 4507 4508 This can be represented as a collection of submatrices as 4509 4510 .vb 4511 A B C 4512 D E F 4513 G H I 4514 .ve 4515 4516 Where the submatrices A,B,C are owned by proc0, D,E,F are 4517 owned by proc1, G,H,I are owned by proc2. 4518 4519 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4520 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4521 The 'M','N' parameters are 8,8, and have the same values on all procs. 4522 4523 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4524 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4525 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4526 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4527 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4528 matrix, ans [DF] as another SeqAIJ matrix. 4529 4530 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4531 allocated for every row of the local diagonal submatrix, and `o_nz` 4532 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4533 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4534 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4535 In this case, the values of `d_nz`,`o_nz` are 4536 .vb 4537 proc0 dnz = 2, o_nz = 2 4538 proc1 dnz = 3, o_nz = 2 4539 proc2 dnz = 1, o_nz = 4 4540 .ve 4541 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4542 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4543 for proc3. i.e we are using 12+15+10=37 storage locations to store 4544 34 values. 4545 4546 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4547 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4548 In the above case the values for d_nnz,o_nnz are 4549 .vb 4550 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4551 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4552 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4553 .ve 4554 Here the space allocated is sum of all the above values i.e 34, and 4555 hence pre-allocation is perfect. 4556 4557 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4558 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4559 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4560 @*/ 4561 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4562 { 4563 PetscMPIInt size; 4564 4565 PetscFunctionBegin; 4566 PetscCall(MatCreate(comm, A)); 4567 PetscCall(MatSetSizes(*A, m, n, M, N)); 4568 PetscCallMPI(MPI_Comm_size(comm, &size)); 4569 if (size > 1) { 4570 PetscCall(MatSetType(*A, MATMPIAIJ)); 4571 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4572 } else { 4573 PetscCall(MatSetType(*A, MATSEQAIJ)); 4574 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4575 } 4576 PetscFunctionReturn(PETSC_SUCCESS); 4577 } 4578 4579 /*MC 4580 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4581 4582 Synopsis: 4583 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4584 4585 Not Collective 4586 4587 Input Parameter: 4588 . A - the `MATMPIAIJ` matrix 4589 4590 Output Parameters: 4591 + Ad - the diagonal portion of the matrix 4592 . Ao - the off-diagonal portion of the matrix 4593 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4594 - ierr - error code 4595 4596 Level: advanced 4597 4598 Note: 4599 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4600 4601 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4602 M*/ 4603 4604 /*MC 4605 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4606 4607 Synopsis: 4608 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4609 4610 Not Collective 4611 4612 Input Parameters: 4613 + A - the `MATMPIAIJ` matrix 4614 . Ad - the diagonal portion of the matrix 4615 . Ao - the off-diagonal portion of the matrix 4616 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4617 - ierr - error code 4618 4619 Level: advanced 4620 4621 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4622 M*/ 4623 4624 /*@C 4625 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4626 4627 Not Collective 4628 4629 Input Parameter: 4630 . A - The `MATMPIAIJ` matrix 4631 4632 Output Parameters: 4633 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4634 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4635 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4636 4637 Level: intermediate 4638 4639 Note: 4640 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4641 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4642 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4643 local column numbers to global column numbers in the original matrix. 4644 4645 Fortran Notes: 4646 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4647 4648 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4649 @*/ 4650 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4651 { 4652 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4653 PetscBool flg; 4654 4655 PetscFunctionBegin; 4656 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4657 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4658 if (Ad) *Ad = a->A; 4659 if (Ao) *Ao = a->B; 4660 if (colmap) *colmap = a->garray; 4661 PetscFunctionReturn(PETSC_SUCCESS); 4662 } 4663 4664 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4665 { 4666 PetscInt m, N, i, rstart, nnz, Ii; 4667 PetscInt *indx; 4668 PetscScalar *values; 4669 MatType rootType; 4670 4671 PetscFunctionBegin; 4672 PetscCall(MatGetSize(inmat, &m, &N)); 4673 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4674 PetscInt *dnz, *onz, sum, bs, cbs; 4675 4676 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4677 /* Check sum(n) = N */ 4678 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4679 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4680 4681 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4682 rstart -= m; 4683 4684 MatPreallocateBegin(comm, m, n, dnz, onz); 4685 for (i = 0; i < m; i++) { 4686 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4687 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4688 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4689 } 4690 4691 PetscCall(MatCreate(comm, outmat)); 4692 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4693 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4694 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4695 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4696 PetscCall(MatSetType(*outmat, rootType)); 4697 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4698 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4699 MatPreallocateEnd(dnz, onz); 4700 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4701 } 4702 4703 /* numeric phase */ 4704 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4705 for (i = 0; i < m; i++) { 4706 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4707 Ii = i + rstart; 4708 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4709 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4710 } 4711 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4712 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4713 PetscFunctionReturn(PETSC_SUCCESS); 4714 } 4715 4716 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4717 { 4718 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4719 4720 PetscFunctionBegin; 4721 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4722 PetscCall(PetscFree(merge->id_r)); 4723 PetscCall(PetscFree(merge->len_s)); 4724 PetscCall(PetscFree(merge->len_r)); 4725 PetscCall(PetscFree(merge->bi)); 4726 PetscCall(PetscFree(merge->bj)); 4727 PetscCall(PetscFree(merge->buf_ri[0])); 4728 PetscCall(PetscFree(merge->buf_ri)); 4729 PetscCall(PetscFree(merge->buf_rj[0])); 4730 PetscCall(PetscFree(merge->buf_rj)); 4731 PetscCall(PetscFree(merge->coi)); 4732 PetscCall(PetscFree(merge->coj)); 4733 PetscCall(PetscFree(merge->owners_co)); 4734 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4735 PetscCall(PetscFree(merge)); 4736 PetscFunctionReturn(PETSC_SUCCESS); 4737 } 4738 4739 #include <../src/mat/utils/freespace.h> 4740 #include <petscbt.h> 4741 4742 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4743 { 4744 MPI_Comm comm; 4745 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4746 PetscMPIInt size, rank, taga, *len_s; 4747 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4748 PetscMPIInt proc, k; 4749 PetscInt **buf_ri, **buf_rj; 4750 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4751 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4752 MPI_Request *s_waits, *r_waits; 4753 MPI_Status *status; 4754 const MatScalar *aa, *a_a; 4755 MatScalar **abuf_r, *ba_i; 4756 Mat_Merge_SeqsToMPI *merge; 4757 PetscContainer container; 4758 4759 PetscFunctionBegin; 4760 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4761 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4762 4763 PetscCallMPI(MPI_Comm_size(comm, &size)); 4764 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4765 4766 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4767 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4768 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4769 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4770 aa = a_a; 4771 4772 bi = merge->bi; 4773 bj = merge->bj; 4774 buf_ri = merge->buf_ri; 4775 buf_rj = merge->buf_rj; 4776 4777 PetscCall(PetscMalloc1(size, &status)); 4778 owners = merge->rowmap->range; 4779 len_s = merge->len_s; 4780 4781 /* send and recv matrix values */ 4782 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4783 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4784 4785 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4786 for (proc = 0, k = 0; proc < size; proc++) { 4787 if (!len_s[proc]) continue; 4788 i = owners[proc]; 4789 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4790 k++; 4791 } 4792 4793 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4794 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4795 PetscCall(PetscFree(status)); 4796 4797 PetscCall(PetscFree(s_waits)); 4798 PetscCall(PetscFree(r_waits)); 4799 4800 /* insert mat values of mpimat */ 4801 PetscCall(PetscMalloc1(N, &ba_i)); 4802 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4803 4804 for (k = 0; k < merge->nrecv; k++) { 4805 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4806 nrows = *buf_ri_k[k]; 4807 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4808 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4809 } 4810 4811 /* set values of ba */ 4812 m = merge->rowmap->n; 4813 for (i = 0; i < m; i++) { 4814 arow = owners[rank] + i; 4815 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4816 bnzi = bi[i + 1] - bi[i]; 4817 PetscCall(PetscArrayzero(ba_i, bnzi)); 4818 4819 /* add local non-zero vals of this proc's seqmat into ba */ 4820 anzi = ai[arow + 1] - ai[arow]; 4821 aj = a->j + ai[arow]; 4822 aa = a_a + ai[arow]; 4823 nextaj = 0; 4824 for (j = 0; nextaj < anzi; j++) { 4825 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4826 ba_i[j] += aa[nextaj++]; 4827 } 4828 } 4829 4830 /* add received vals into ba */ 4831 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4832 /* i-th row */ 4833 if (i == *nextrow[k]) { 4834 anzi = *(nextai[k] + 1) - *nextai[k]; 4835 aj = buf_rj[k] + *nextai[k]; 4836 aa = abuf_r[k] + *nextai[k]; 4837 nextaj = 0; 4838 for (j = 0; nextaj < anzi; j++) { 4839 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4840 ba_i[j] += aa[nextaj++]; 4841 } 4842 } 4843 nextrow[k]++; 4844 nextai[k]++; 4845 } 4846 } 4847 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4848 } 4849 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4850 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4851 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4852 4853 PetscCall(PetscFree(abuf_r[0])); 4854 PetscCall(PetscFree(abuf_r)); 4855 PetscCall(PetscFree(ba_i)); 4856 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4857 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4858 PetscFunctionReturn(PETSC_SUCCESS); 4859 } 4860 4861 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4862 { 4863 Mat B_mpi; 4864 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4865 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4866 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4867 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4868 PetscInt len, *dnz, *onz, bs, cbs; 4869 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4870 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4871 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4872 MPI_Status *status; 4873 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4874 PetscBT lnkbt; 4875 Mat_Merge_SeqsToMPI *merge; 4876 PetscContainer container; 4877 4878 PetscFunctionBegin; 4879 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4880 4881 /* make sure it is a PETSc comm */ 4882 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4883 PetscCallMPI(MPI_Comm_size(comm, &size)); 4884 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4885 4886 PetscCall(PetscNew(&merge)); 4887 PetscCall(PetscMalloc1(size, &status)); 4888 4889 /* determine row ownership */ 4890 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4891 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4892 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4893 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4894 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4895 PetscCall(PetscMalloc1(size, &len_si)); 4896 PetscCall(PetscMalloc1(size, &merge->len_s)); 4897 4898 m = merge->rowmap->n; 4899 owners = merge->rowmap->range; 4900 4901 /* determine the number of messages to send, their lengths */ 4902 len_s = merge->len_s; 4903 4904 len = 0; /* length of buf_si[] */ 4905 merge->nsend = 0; 4906 for (PetscMPIInt proc = 0; proc < size; proc++) { 4907 len_si[proc] = 0; 4908 if (proc == rank) { 4909 len_s[proc] = 0; 4910 } else { 4911 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4912 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4913 } 4914 if (len_s[proc]) { 4915 merge->nsend++; 4916 nrows = 0; 4917 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4918 if (ai[i + 1] > ai[i]) nrows++; 4919 } 4920 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4921 len += len_si[proc]; 4922 } 4923 } 4924 4925 /* determine the number and length of messages to receive for ij-structure */ 4926 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4927 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4928 4929 /* post the Irecv of j-structure */ 4930 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4931 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4932 4933 /* post the Isend of j-structure */ 4934 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4935 4936 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4937 if (!len_s[proc]) continue; 4938 i = owners[proc]; 4939 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4940 k++; 4941 } 4942 4943 /* receives and sends of j-structure are complete */ 4944 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4945 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4946 4947 /* send and recv i-structure */ 4948 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4949 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4950 4951 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4952 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4953 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4954 if (!len_s[proc]) continue; 4955 /* form outgoing message for i-structure: 4956 buf_si[0]: nrows to be sent 4957 [1:nrows]: row index (global) 4958 [nrows+1:2*nrows+1]: i-structure index 4959 */ 4960 nrows = len_si[proc] / 2 - 1; 4961 buf_si_i = buf_si + nrows + 1; 4962 buf_si[0] = nrows; 4963 buf_si_i[0] = 0; 4964 nrows = 0; 4965 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4966 anzi = ai[i + 1] - ai[i]; 4967 if (anzi) { 4968 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4969 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4970 nrows++; 4971 } 4972 } 4973 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4974 k++; 4975 buf_si += len_si[proc]; 4976 } 4977 4978 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4979 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4980 4981 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4982 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4983 4984 PetscCall(PetscFree(len_si)); 4985 PetscCall(PetscFree(len_ri)); 4986 PetscCall(PetscFree(rj_waits)); 4987 PetscCall(PetscFree2(si_waits, sj_waits)); 4988 PetscCall(PetscFree(ri_waits)); 4989 PetscCall(PetscFree(buf_s)); 4990 PetscCall(PetscFree(status)); 4991 4992 /* compute a local seq matrix in each processor */ 4993 /* allocate bi array and free space for accumulating nonzero column info */ 4994 PetscCall(PetscMalloc1(m + 1, &bi)); 4995 bi[0] = 0; 4996 4997 /* create and initialize a linked list */ 4998 nlnk = N + 1; 4999 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5000 5001 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5002 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5003 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5004 5005 current_space = free_space; 5006 5007 /* determine symbolic info for each local row */ 5008 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5009 5010 for (k = 0; k < merge->nrecv; k++) { 5011 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5012 nrows = *buf_ri_k[k]; 5013 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5014 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5015 } 5016 5017 MatPreallocateBegin(comm, m, n, dnz, onz); 5018 len = 0; 5019 for (i = 0; i < m; i++) { 5020 bnzi = 0; 5021 /* add local non-zero cols of this proc's seqmat into lnk */ 5022 arow = owners[rank] + i; 5023 anzi = ai[arow + 1] - ai[arow]; 5024 aj = a->j + ai[arow]; 5025 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5026 bnzi += nlnk; 5027 /* add received col data into lnk */ 5028 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5029 if (i == *nextrow[k]) { /* i-th row */ 5030 anzi = *(nextai[k] + 1) - *nextai[k]; 5031 aj = buf_rj[k] + *nextai[k]; 5032 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5033 bnzi += nlnk; 5034 nextrow[k]++; 5035 nextai[k]++; 5036 } 5037 } 5038 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5039 5040 /* if free space is not available, make more free space */ 5041 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5042 /* copy data into free space, then initialize lnk */ 5043 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5044 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5045 5046 current_space->array += bnzi; 5047 current_space->local_used += bnzi; 5048 current_space->local_remaining -= bnzi; 5049 5050 bi[i + 1] = bi[i] + bnzi; 5051 } 5052 5053 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5054 5055 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5056 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5057 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5058 5059 /* create symbolic parallel matrix B_mpi */ 5060 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5061 PetscCall(MatCreate(comm, &B_mpi)); 5062 if (n == PETSC_DECIDE) { 5063 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5064 } else { 5065 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5066 } 5067 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5068 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5069 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5070 MatPreallocateEnd(dnz, onz); 5071 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5072 5073 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5074 B_mpi->assembled = PETSC_FALSE; 5075 merge->bi = bi; 5076 merge->bj = bj; 5077 merge->buf_ri = buf_ri; 5078 merge->buf_rj = buf_rj; 5079 merge->coi = NULL; 5080 merge->coj = NULL; 5081 merge->owners_co = NULL; 5082 5083 PetscCall(PetscCommDestroy(&comm)); 5084 5085 /* attach the supporting struct to B_mpi for reuse */ 5086 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5087 PetscCall(PetscContainerSetPointer(container, merge)); 5088 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5089 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5090 PetscCall(PetscContainerDestroy(&container)); 5091 *mpimat = B_mpi; 5092 5093 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5094 PetscFunctionReturn(PETSC_SUCCESS); 5095 } 5096 5097 /*@ 5098 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5099 matrices from each processor 5100 5101 Collective 5102 5103 Input Parameters: 5104 + comm - the communicators the parallel matrix will live on 5105 . seqmat - the input sequential matrices 5106 . m - number of local rows (or `PETSC_DECIDE`) 5107 . n - number of local columns (or `PETSC_DECIDE`) 5108 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5109 5110 Output Parameter: 5111 . mpimat - the parallel matrix generated 5112 5113 Level: advanced 5114 5115 Note: 5116 The dimensions of the sequential matrix in each processor MUST be the same. 5117 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5118 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5119 5120 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5121 @*/ 5122 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5123 { 5124 PetscMPIInt size; 5125 5126 PetscFunctionBegin; 5127 PetscCallMPI(MPI_Comm_size(comm, &size)); 5128 if (size == 1) { 5129 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5130 if (scall == MAT_INITIAL_MATRIX) { 5131 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5132 } else { 5133 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5134 } 5135 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5136 PetscFunctionReturn(PETSC_SUCCESS); 5137 } 5138 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5139 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5140 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5141 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5142 PetscFunctionReturn(PETSC_SUCCESS); 5143 } 5144 5145 /*@ 5146 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5147 5148 Not Collective 5149 5150 Input Parameter: 5151 . A - the matrix 5152 5153 Output Parameter: 5154 . A_loc - the local sequential matrix generated 5155 5156 Level: developer 5157 5158 Notes: 5159 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5160 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5161 `n` is the global column count obtained with `MatGetSize()` 5162 5163 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5164 5165 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5166 5167 Destroy the matrix with `MatDestroy()` 5168 5169 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5170 @*/ 5171 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5172 { 5173 PetscBool mpi; 5174 5175 PetscFunctionBegin; 5176 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5177 if (mpi) { 5178 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5179 } else { 5180 *A_loc = A; 5181 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5182 } 5183 PetscFunctionReturn(PETSC_SUCCESS); 5184 } 5185 5186 /*@ 5187 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5188 5189 Not Collective 5190 5191 Input Parameters: 5192 + A - the matrix 5193 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5194 5195 Output Parameter: 5196 . A_loc - the local sequential matrix generated 5197 5198 Level: developer 5199 5200 Notes: 5201 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5202 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5203 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5204 5205 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5206 5207 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5208 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5209 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5210 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5211 5212 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5213 @*/ 5214 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5215 { 5216 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5217 Mat_SeqAIJ *mat, *a, *b; 5218 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5219 const PetscScalar *aa, *ba, *aav, *bav; 5220 PetscScalar *ca, *cam; 5221 PetscMPIInt size; 5222 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5223 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5224 PetscBool match; 5225 5226 PetscFunctionBegin; 5227 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5228 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5229 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5230 if (size == 1) { 5231 if (scall == MAT_INITIAL_MATRIX) { 5232 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5233 *A_loc = mpimat->A; 5234 } else if (scall == MAT_REUSE_MATRIX) { 5235 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5236 } 5237 PetscFunctionReturn(PETSC_SUCCESS); 5238 } 5239 5240 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5241 a = (Mat_SeqAIJ *)mpimat->A->data; 5242 b = (Mat_SeqAIJ *)mpimat->B->data; 5243 ai = a->i; 5244 aj = a->j; 5245 bi = b->i; 5246 bj = b->j; 5247 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5248 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5249 aa = aav; 5250 ba = bav; 5251 if (scall == MAT_INITIAL_MATRIX) { 5252 PetscCall(PetscMalloc1(1 + am, &ci)); 5253 ci[0] = 0; 5254 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5255 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5256 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5257 k = 0; 5258 for (i = 0; i < am; i++) { 5259 ncols_o = bi[i + 1] - bi[i]; 5260 ncols_d = ai[i + 1] - ai[i]; 5261 /* off-diagonal portion of A */ 5262 for (jo = 0; jo < ncols_o; jo++) { 5263 col = cmap[*bj]; 5264 if (col >= cstart) break; 5265 cj[k] = col; 5266 bj++; 5267 ca[k++] = *ba++; 5268 } 5269 /* diagonal portion of A */ 5270 for (j = 0; j < ncols_d; j++) { 5271 cj[k] = cstart + *aj++; 5272 ca[k++] = *aa++; 5273 } 5274 /* off-diagonal portion of A */ 5275 for (j = jo; j < ncols_o; j++) { 5276 cj[k] = cmap[*bj++]; 5277 ca[k++] = *ba++; 5278 } 5279 } 5280 /* put together the new matrix */ 5281 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5282 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5283 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5284 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5285 mat->free_a = PETSC_TRUE; 5286 mat->free_ij = PETSC_TRUE; 5287 mat->nonew = 0; 5288 } else if (scall == MAT_REUSE_MATRIX) { 5289 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5290 ci = mat->i; 5291 cj = mat->j; 5292 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5293 for (i = 0; i < am; i++) { 5294 /* off-diagonal portion of A */ 5295 ncols_o = bi[i + 1] - bi[i]; 5296 for (jo = 0; jo < ncols_o; jo++) { 5297 col = cmap[*bj]; 5298 if (col >= cstart) break; 5299 *cam++ = *ba++; 5300 bj++; 5301 } 5302 /* diagonal portion of A */ 5303 ncols_d = ai[i + 1] - ai[i]; 5304 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5305 /* off-diagonal portion of A */ 5306 for (j = jo; j < ncols_o; j++) { 5307 *cam++ = *ba++; 5308 bj++; 5309 } 5310 } 5311 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5312 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5313 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5314 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5315 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5316 PetscFunctionReturn(PETSC_SUCCESS); 5317 } 5318 5319 /*@ 5320 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5321 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5322 5323 Not Collective 5324 5325 Input Parameters: 5326 + A - the matrix 5327 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5328 5329 Output Parameters: 5330 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5331 - A_loc - the local sequential matrix generated 5332 5333 Level: developer 5334 5335 Note: 5336 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5337 part, then those associated with the off-diagonal part (in its local ordering) 5338 5339 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5340 @*/ 5341 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5342 { 5343 Mat Ao, Ad; 5344 const PetscInt *cmap; 5345 PetscMPIInt size; 5346 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5347 5348 PetscFunctionBegin; 5349 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5350 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5351 if (size == 1) { 5352 if (scall == MAT_INITIAL_MATRIX) { 5353 PetscCall(PetscObjectReference((PetscObject)Ad)); 5354 *A_loc = Ad; 5355 } else if (scall == MAT_REUSE_MATRIX) { 5356 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5357 } 5358 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5359 PetscFunctionReturn(PETSC_SUCCESS); 5360 } 5361 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5362 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5363 if (f) { 5364 PetscCall((*f)(A, scall, glob, A_loc)); 5365 } else { 5366 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5367 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5368 Mat_SeqAIJ *c; 5369 PetscInt *ai = a->i, *aj = a->j; 5370 PetscInt *bi = b->i, *bj = b->j; 5371 PetscInt *ci, *cj; 5372 const PetscScalar *aa, *ba; 5373 PetscScalar *ca; 5374 PetscInt i, j, am, dn, on; 5375 5376 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5377 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5378 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5379 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5380 if (scall == MAT_INITIAL_MATRIX) { 5381 PetscInt k; 5382 PetscCall(PetscMalloc1(1 + am, &ci)); 5383 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5384 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5385 ci[0] = 0; 5386 for (i = 0, k = 0; i < am; i++) { 5387 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5388 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5389 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5390 /* diagonal portion of A */ 5391 for (j = 0; j < ncols_d; j++, k++) { 5392 cj[k] = *aj++; 5393 ca[k] = *aa++; 5394 } 5395 /* off-diagonal portion of A */ 5396 for (j = 0; j < ncols_o; j++, k++) { 5397 cj[k] = dn + *bj++; 5398 ca[k] = *ba++; 5399 } 5400 } 5401 /* put together the new matrix */ 5402 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5403 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5404 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5405 c = (Mat_SeqAIJ *)(*A_loc)->data; 5406 c->free_a = PETSC_TRUE; 5407 c->free_ij = PETSC_TRUE; 5408 c->nonew = 0; 5409 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5410 } else if (scall == MAT_REUSE_MATRIX) { 5411 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5412 for (i = 0; i < am; i++) { 5413 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5414 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5415 /* diagonal portion of A */ 5416 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5417 /* off-diagonal portion of A */ 5418 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5419 } 5420 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5421 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5422 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5423 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5424 if (glob) { 5425 PetscInt cst, *gidx; 5426 5427 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5428 PetscCall(PetscMalloc1(dn + on, &gidx)); 5429 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5430 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5431 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5432 } 5433 } 5434 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5435 PetscFunctionReturn(PETSC_SUCCESS); 5436 } 5437 5438 /*@C 5439 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5440 5441 Not Collective 5442 5443 Input Parameters: 5444 + A - the matrix 5445 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5446 . row - index set of rows to extract (or `NULL`) 5447 - col - index set of columns to extract (or `NULL`) 5448 5449 Output Parameter: 5450 . A_loc - the local sequential matrix generated 5451 5452 Level: developer 5453 5454 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5455 @*/ 5456 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5457 { 5458 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5459 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5460 IS isrowa, iscola; 5461 Mat *aloc; 5462 PetscBool match; 5463 5464 PetscFunctionBegin; 5465 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5466 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5467 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5468 if (!row) { 5469 start = A->rmap->rstart; 5470 end = A->rmap->rend; 5471 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5472 } else { 5473 isrowa = *row; 5474 } 5475 if (!col) { 5476 start = A->cmap->rstart; 5477 cmap = a->garray; 5478 nzA = a->A->cmap->n; 5479 nzB = a->B->cmap->n; 5480 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5481 ncols = 0; 5482 for (i = 0; i < nzB; i++) { 5483 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5484 else break; 5485 } 5486 imark = i; 5487 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5488 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5489 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5490 } else { 5491 iscola = *col; 5492 } 5493 if (scall != MAT_INITIAL_MATRIX) { 5494 PetscCall(PetscMalloc1(1, &aloc)); 5495 aloc[0] = *A_loc; 5496 } 5497 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5498 if (!col) { /* attach global id of condensed columns */ 5499 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5500 } 5501 *A_loc = aloc[0]; 5502 PetscCall(PetscFree(aloc)); 5503 if (!row) PetscCall(ISDestroy(&isrowa)); 5504 if (!col) PetscCall(ISDestroy(&iscola)); 5505 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5506 PetscFunctionReturn(PETSC_SUCCESS); 5507 } 5508 5509 /* 5510 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5511 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5512 * on a global size. 5513 * */ 5514 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5515 { 5516 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5517 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5518 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5519 PetscMPIInt owner; 5520 PetscSFNode *iremote, *oiremote; 5521 const PetscInt *lrowindices; 5522 PetscSF sf, osf; 5523 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5524 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5525 MPI_Comm comm; 5526 ISLocalToGlobalMapping mapping; 5527 const PetscScalar *pd_a, *po_a; 5528 5529 PetscFunctionBegin; 5530 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5531 /* plocalsize is the number of roots 5532 * nrows is the number of leaves 5533 * */ 5534 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5535 PetscCall(ISGetLocalSize(rows, &nrows)); 5536 PetscCall(PetscCalloc1(nrows, &iremote)); 5537 PetscCall(ISGetIndices(rows, &lrowindices)); 5538 for (i = 0; i < nrows; i++) { 5539 /* Find a remote index and an owner for a row 5540 * The row could be local or remote 5541 * */ 5542 owner = 0; 5543 lidx = 0; 5544 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5545 iremote[i].index = lidx; 5546 iremote[i].rank = owner; 5547 } 5548 /* Create SF to communicate how many nonzero columns for each row */ 5549 PetscCall(PetscSFCreate(comm, &sf)); 5550 /* SF will figure out the number of nonzero columns for each row, and their 5551 * offsets 5552 * */ 5553 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5554 PetscCall(PetscSFSetFromOptions(sf)); 5555 PetscCall(PetscSFSetUp(sf)); 5556 5557 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5558 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5559 PetscCall(PetscCalloc1(nrows, &pnnz)); 5560 roffsets[0] = 0; 5561 roffsets[1] = 0; 5562 for (i = 0; i < plocalsize; i++) { 5563 /* diagonal */ 5564 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5565 /* off-diagonal */ 5566 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5567 /* compute offsets so that we relative location for each row */ 5568 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5569 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5570 } 5571 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5572 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5573 /* 'r' means root, and 'l' means leaf */ 5574 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5575 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5577 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5578 PetscCall(PetscSFDestroy(&sf)); 5579 PetscCall(PetscFree(roffsets)); 5580 PetscCall(PetscFree(nrcols)); 5581 dntotalcols = 0; 5582 ontotalcols = 0; 5583 ncol = 0; 5584 for (i = 0; i < nrows; i++) { 5585 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5586 ncol = PetscMax(pnnz[i], ncol); 5587 /* diagonal */ 5588 dntotalcols += nlcols[i * 2 + 0]; 5589 /* off-diagonal */ 5590 ontotalcols += nlcols[i * 2 + 1]; 5591 } 5592 /* We do not need to figure the right number of columns 5593 * since all the calculations will be done by going through the raw data 5594 * */ 5595 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5596 PetscCall(MatSetUp(*P_oth)); 5597 PetscCall(PetscFree(pnnz)); 5598 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5599 /* diagonal */ 5600 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5601 /* off-diagonal */ 5602 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5603 /* diagonal */ 5604 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5605 /* off-diagonal */ 5606 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5607 dntotalcols = 0; 5608 ontotalcols = 0; 5609 ntotalcols = 0; 5610 for (i = 0; i < nrows; i++) { 5611 owner = 0; 5612 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5613 /* Set iremote for diag matrix */ 5614 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5615 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5616 iremote[dntotalcols].rank = owner; 5617 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5618 ilocal[dntotalcols++] = ntotalcols++; 5619 } 5620 /* off-diagonal */ 5621 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5622 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5623 oiremote[ontotalcols].rank = owner; 5624 oilocal[ontotalcols++] = ntotalcols++; 5625 } 5626 } 5627 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5628 PetscCall(PetscFree(loffsets)); 5629 PetscCall(PetscFree(nlcols)); 5630 PetscCall(PetscSFCreate(comm, &sf)); 5631 /* P serves as roots and P_oth is leaves 5632 * Diag matrix 5633 * */ 5634 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5635 PetscCall(PetscSFSetFromOptions(sf)); 5636 PetscCall(PetscSFSetUp(sf)); 5637 5638 PetscCall(PetscSFCreate(comm, &osf)); 5639 /* off-diagonal */ 5640 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5641 PetscCall(PetscSFSetFromOptions(osf)); 5642 PetscCall(PetscSFSetUp(osf)); 5643 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5644 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5645 /* operate on the matrix internal data to save memory */ 5646 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5649 /* Convert to global indices for diag matrix */ 5650 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5651 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5652 /* We want P_oth store global indices */ 5653 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5654 /* Use memory scalable approach */ 5655 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5656 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5657 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5658 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5659 /* Convert back to local indices */ 5660 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5661 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5662 nout = 0; 5663 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5664 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5665 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5666 /* Exchange values */ 5667 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5668 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5669 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5670 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5671 /* Stop PETSc from shrinking memory */ 5672 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5673 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5674 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5675 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5676 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5677 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5678 PetscCall(PetscSFDestroy(&sf)); 5679 PetscCall(PetscSFDestroy(&osf)); 5680 PetscFunctionReturn(PETSC_SUCCESS); 5681 } 5682 5683 /* 5684 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5685 * This supports MPIAIJ and MAIJ 5686 * */ 5687 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5688 { 5689 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5690 Mat_SeqAIJ *p_oth; 5691 IS rows, map; 5692 PetscHMapI hamp; 5693 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5694 MPI_Comm comm; 5695 PetscSF sf, osf; 5696 PetscBool has; 5697 5698 PetscFunctionBegin; 5699 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5700 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5701 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5702 * and then create a submatrix (that often is an overlapping matrix) 5703 * */ 5704 if (reuse == MAT_INITIAL_MATRIX) { 5705 /* Use a hash table to figure out unique keys */ 5706 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5707 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5708 count = 0; 5709 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5710 for (i = 0; i < a->B->cmap->n; i++) { 5711 key = a->garray[i] / dof; 5712 PetscCall(PetscHMapIHas(hamp, key, &has)); 5713 if (!has) { 5714 mapping[i] = count; 5715 PetscCall(PetscHMapISet(hamp, key, count++)); 5716 } else { 5717 /* Current 'i' has the same value the previous step */ 5718 mapping[i] = count - 1; 5719 } 5720 } 5721 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5722 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5723 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5724 PetscCall(PetscCalloc1(htsize, &rowindices)); 5725 off = 0; 5726 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5727 PetscCall(PetscHMapIDestroy(&hamp)); 5728 PetscCall(PetscSortInt(htsize, rowindices)); 5729 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5730 /* In case, the matrix was already created but users want to recreate the matrix */ 5731 PetscCall(MatDestroy(P_oth)); 5732 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5733 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5734 PetscCall(ISDestroy(&map)); 5735 PetscCall(ISDestroy(&rows)); 5736 } else if (reuse == MAT_REUSE_MATRIX) { 5737 /* If matrix was already created, we simply update values using SF objects 5738 * that as attached to the matrix earlier. 5739 */ 5740 const PetscScalar *pd_a, *po_a; 5741 5742 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5743 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5744 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5745 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5746 /* Update values in place */ 5747 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5748 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5749 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5750 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5752 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5753 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5754 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5755 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5756 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5757 PetscFunctionReturn(PETSC_SUCCESS); 5758 } 5759 5760 /*@C 5761 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5762 5763 Collective 5764 5765 Input Parameters: 5766 + A - the first matrix in `MATMPIAIJ` format 5767 . B - the second matrix in `MATMPIAIJ` format 5768 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5769 5770 Output Parameters: 5771 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5772 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5773 - B_seq - the sequential matrix generated 5774 5775 Level: developer 5776 5777 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5778 @*/ 5779 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5780 { 5781 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5782 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5783 IS isrowb, iscolb; 5784 Mat *bseq = NULL; 5785 5786 PetscFunctionBegin; 5787 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5788 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5789 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5790 5791 if (scall == MAT_INITIAL_MATRIX) { 5792 start = A->cmap->rstart; 5793 cmap = a->garray; 5794 nzA = a->A->cmap->n; 5795 nzB = a->B->cmap->n; 5796 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5797 ncols = 0; 5798 for (i = 0; i < nzB; i++) { /* row < local row index */ 5799 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5800 else break; 5801 } 5802 imark = i; 5803 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5804 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5805 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5806 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5807 } else { 5808 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5809 isrowb = *rowb; 5810 iscolb = *colb; 5811 PetscCall(PetscMalloc1(1, &bseq)); 5812 bseq[0] = *B_seq; 5813 } 5814 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5815 *B_seq = bseq[0]; 5816 PetscCall(PetscFree(bseq)); 5817 if (!rowb) { 5818 PetscCall(ISDestroy(&isrowb)); 5819 } else { 5820 *rowb = isrowb; 5821 } 5822 if (!colb) { 5823 PetscCall(ISDestroy(&iscolb)); 5824 } else { 5825 *colb = iscolb; 5826 } 5827 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5828 PetscFunctionReturn(PETSC_SUCCESS); 5829 } 5830 5831 /* 5832 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5833 of the OFF-DIAGONAL portion of local A 5834 5835 Collective 5836 5837 Input Parameters: 5838 + A,B - the matrices in `MATMPIAIJ` format 5839 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5840 5841 Output Parameter: 5842 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5843 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5844 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5845 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5846 5847 Developer Note: 5848 This directly accesses information inside the VecScatter associated with the matrix-vector product 5849 for this matrix. This is not desirable.. 5850 5851 Level: developer 5852 5853 */ 5854 5855 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5856 { 5857 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5858 VecScatter ctx; 5859 MPI_Comm comm; 5860 const PetscMPIInt *rprocs, *sprocs; 5861 PetscMPIInt nrecvs, nsends; 5862 const PetscInt *srow, *rstarts, *sstarts; 5863 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5864 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5865 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5866 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5867 PetscMPIInt size, tag, rank, nreqs; 5868 5869 PetscFunctionBegin; 5870 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5871 PetscCallMPI(MPI_Comm_size(comm, &size)); 5872 5873 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5874 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5875 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5876 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5877 5878 if (size == 1) { 5879 startsj_s = NULL; 5880 bufa_ptr = NULL; 5881 *B_oth = NULL; 5882 PetscFunctionReturn(PETSC_SUCCESS); 5883 } 5884 5885 ctx = a->Mvctx; 5886 tag = ((PetscObject)ctx)->tag; 5887 5888 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5889 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5890 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5891 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5892 PetscCall(PetscMalloc1(nreqs, &reqs)); 5893 rwaits = reqs; 5894 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5895 5896 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5897 if (scall == MAT_INITIAL_MATRIX) { 5898 /* i-array */ 5899 /* post receives */ 5900 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5901 for (i = 0; i < nrecvs; i++) { 5902 rowlen = rvalues + rstarts[i] * rbs; 5903 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5904 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5905 } 5906 5907 /* pack the outgoing message */ 5908 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5909 5910 sstartsj[0] = 0; 5911 rstartsj[0] = 0; 5912 len = 0; /* total length of j or a array to be sent */ 5913 if (nsends) { 5914 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5915 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5916 } 5917 for (i = 0; i < nsends; i++) { 5918 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5919 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5920 for (j = 0; j < nrows; j++) { 5921 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5922 for (l = 0; l < sbs; l++) { 5923 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5924 5925 rowlen[j * sbs + l] = ncols; 5926 5927 len += ncols; 5928 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5929 } 5930 k++; 5931 } 5932 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5933 5934 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5935 } 5936 /* recvs and sends of i-array are completed */ 5937 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5938 PetscCall(PetscFree(svalues)); 5939 5940 /* allocate buffers for sending j and a arrays */ 5941 PetscCall(PetscMalloc1(len + 1, &bufj)); 5942 PetscCall(PetscMalloc1(len + 1, &bufa)); 5943 5944 /* create i-array of B_oth */ 5945 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5946 5947 b_othi[0] = 0; 5948 len = 0; /* total length of j or a array to be received */ 5949 k = 0; 5950 for (i = 0; i < nrecvs; i++) { 5951 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5952 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5953 for (j = 0; j < nrows; j++) { 5954 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5955 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5956 k++; 5957 } 5958 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5959 } 5960 PetscCall(PetscFree(rvalues)); 5961 5962 /* allocate space for j and a arrays of B_oth */ 5963 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5964 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5965 5966 /* j-array */ 5967 /* post receives of j-array */ 5968 for (i = 0; i < nrecvs; i++) { 5969 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5970 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5971 } 5972 5973 /* pack the outgoing message j-array */ 5974 if (nsends) k = sstarts[0]; 5975 for (i = 0; i < nsends; i++) { 5976 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5977 bufJ = bufj + sstartsj[i]; 5978 for (j = 0; j < nrows; j++) { 5979 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5980 for (ll = 0; ll < sbs; ll++) { 5981 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5982 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5983 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5984 } 5985 } 5986 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5987 } 5988 5989 /* recvs and sends of j-array are completed */ 5990 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5991 } else if (scall == MAT_REUSE_MATRIX) { 5992 sstartsj = *startsj_s; 5993 rstartsj = *startsj_r; 5994 bufa = *bufa_ptr; 5995 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5996 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5997 5998 /* a-array */ 5999 /* post receives of a-array */ 6000 for (i = 0; i < nrecvs; i++) { 6001 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6002 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6003 } 6004 6005 /* pack the outgoing message a-array */ 6006 if (nsends) k = sstarts[0]; 6007 for (i = 0; i < nsends; i++) { 6008 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6009 bufA = bufa + sstartsj[i]; 6010 for (j = 0; j < nrows; j++) { 6011 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6012 for (ll = 0; ll < sbs; ll++) { 6013 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6014 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6015 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6016 } 6017 } 6018 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6019 } 6020 /* recvs and sends of a-array are completed */ 6021 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6022 PetscCall(PetscFree(reqs)); 6023 6024 if (scall == MAT_INITIAL_MATRIX) { 6025 Mat_SeqAIJ *b_oth; 6026 6027 /* put together the new matrix */ 6028 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6029 6030 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6031 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6032 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6033 b_oth->free_a = PETSC_TRUE; 6034 b_oth->free_ij = PETSC_TRUE; 6035 b_oth->nonew = 0; 6036 6037 PetscCall(PetscFree(bufj)); 6038 if (!startsj_s || !bufa_ptr) { 6039 PetscCall(PetscFree2(sstartsj, rstartsj)); 6040 PetscCall(PetscFree(bufa_ptr)); 6041 } else { 6042 *startsj_s = sstartsj; 6043 *startsj_r = rstartsj; 6044 *bufa_ptr = bufa; 6045 } 6046 } else if (scall == MAT_REUSE_MATRIX) { 6047 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6048 } 6049 6050 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6051 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6052 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6053 PetscFunctionReturn(PETSC_SUCCESS); 6054 } 6055 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6059 #if defined(PETSC_HAVE_MKL_SPARSE) 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6064 #if defined(PETSC_HAVE_ELEMENTAL) 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6066 #endif 6067 #if defined(PETSC_HAVE_SCALAPACK) 6068 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6069 #endif 6070 #if defined(PETSC_HAVE_HYPRE) 6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6072 #endif 6073 #if defined(PETSC_HAVE_CUDA) 6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6075 #endif 6076 #if defined(PETSC_HAVE_HIP) 6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6078 #endif 6079 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6081 #endif 6082 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6083 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6084 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6085 6086 /* 6087 Computes (B'*A')' since computing B*A directly is untenable 6088 6089 n p p 6090 [ ] [ ] [ ] 6091 m [ A ] * n [ B ] = m [ C ] 6092 [ ] [ ] [ ] 6093 6094 */ 6095 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6096 { 6097 Mat At, Bt, Ct; 6098 6099 PetscFunctionBegin; 6100 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6101 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6102 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6103 PetscCall(MatDestroy(&At)); 6104 PetscCall(MatDestroy(&Bt)); 6105 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6106 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6107 PetscCall(MatDestroy(&Ct)); 6108 PetscFunctionReturn(PETSC_SUCCESS); 6109 } 6110 6111 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6112 { 6113 PetscBool cisdense; 6114 6115 PetscFunctionBegin; 6116 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6117 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6118 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6119 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6120 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6121 PetscCall(MatSetUp(C)); 6122 6123 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6124 PetscFunctionReturn(PETSC_SUCCESS); 6125 } 6126 6127 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6128 { 6129 Mat_Product *product = C->product; 6130 Mat A = product->A, B = product->B; 6131 6132 PetscFunctionBegin; 6133 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6134 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6135 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6136 C->ops->productsymbolic = MatProductSymbolic_AB; 6137 PetscFunctionReturn(PETSC_SUCCESS); 6138 } 6139 6140 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6141 { 6142 Mat_Product *product = C->product; 6143 6144 PetscFunctionBegin; 6145 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6146 PetscFunctionReturn(PETSC_SUCCESS); 6147 } 6148 6149 /* 6150 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6151 6152 Input Parameters: 6153 6154 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6155 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6156 6157 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6158 6159 For Set1, j1[] contains column indices of the nonzeros. 6160 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6161 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6162 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6163 6164 Similar for Set2. 6165 6166 This routine merges the two sets of nonzeros row by row and removes repeats. 6167 6168 Output Parameters: (memory is allocated by the caller) 6169 6170 i[],j[]: the CSR of the merged matrix, which has m rows. 6171 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6172 imap2[]: similar to imap1[], but for Set2. 6173 Note we order nonzeros row-by-row and from left to right. 6174 */ 6175 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6176 { 6177 PetscInt r, m; /* Row index of mat */ 6178 PetscCount t, t1, t2, b1, e1, b2, e2; 6179 6180 PetscFunctionBegin; 6181 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6182 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6183 i[0] = 0; 6184 for (r = 0; r < m; r++) { /* Do row by row merging */ 6185 b1 = rowBegin1[r]; 6186 e1 = rowEnd1[r]; 6187 b2 = rowBegin2[r]; 6188 e2 = rowEnd2[r]; 6189 while (b1 < e1 && b2 < e2) { 6190 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6191 j[t] = j1[b1]; 6192 imap1[t1] = t; 6193 imap2[t2] = t; 6194 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6195 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6196 t1++; 6197 t2++; 6198 t++; 6199 } else if (j1[b1] < j2[b2]) { 6200 j[t] = j1[b1]; 6201 imap1[t1] = t; 6202 b1 += jmap1[t1 + 1] - jmap1[t1]; 6203 t1++; 6204 t++; 6205 } else { 6206 j[t] = j2[b2]; 6207 imap2[t2] = t; 6208 b2 += jmap2[t2 + 1] - jmap2[t2]; 6209 t2++; 6210 t++; 6211 } 6212 } 6213 /* Merge the remaining in either j1[] or j2[] */ 6214 while (b1 < e1) { 6215 j[t] = j1[b1]; 6216 imap1[t1] = t; 6217 b1 += jmap1[t1 + 1] - jmap1[t1]; 6218 t1++; 6219 t++; 6220 } 6221 while (b2 < e2) { 6222 j[t] = j2[b2]; 6223 imap2[t2] = t; 6224 b2 += jmap2[t2 + 1] - jmap2[t2]; 6225 t2++; 6226 t++; 6227 } 6228 PetscCall(PetscIntCast(t, i + r + 1)); 6229 } 6230 PetscFunctionReturn(PETSC_SUCCESS); 6231 } 6232 6233 /* 6234 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6235 6236 Input Parameters: 6237 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6238 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6239 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6240 6241 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6242 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6243 6244 Output Parameters: 6245 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6246 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6247 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6248 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6249 6250 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6251 Atot: number of entries belonging to the diagonal block. 6252 Annz: number of unique nonzeros belonging to the diagonal block. 6253 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6254 repeats (i.e., same 'i,j' pair). 6255 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6256 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6257 6258 Atot: number of entries belonging to the diagonal block 6259 Annz: number of unique nonzeros belonging to the diagonal block. 6260 6261 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6262 6263 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6264 */ 6265 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6266 { 6267 PetscInt cstart, cend, rstart, rend, row, col; 6268 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6269 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6270 PetscCount k, m, p, q, r, s, mid; 6271 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6272 6273 PetscFunctionBegin; 6274 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6275 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6276 m = rend - rstart; 6277 6278 /* Skip negative rows */ 6279 for (k = 0; k < n; k++) 6280 if (i[k] >= 0) break; 6281 6282 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6283 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6284 */ 6285 while (k < n) { 6286 row = i[k]; 6287 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6288 for (s = k; s < n; s++) 6289 if (i[s] != row) break; 6290 6291 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6292 for (p = k; p < s; p++) { 6293 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6294 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6295 } 6296 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6297 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6298 rowBegin[row - rstart] = k; 6299 rowMid[row - rstart] = mid; 6300 rowEnd[row - rstart] = s; 6301 6302 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6303 Atot += mid - k; 6304 Btot += s - mid; 6305 6306 /* Count unique nonzeros of this diag row */ 6307 for (p = k; p < mid;) { 6308 col = j[p]; 6309 do { 6310 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6311 p++; 6312 } while (p < mid && j[p] == col); 6313 Annz++; 6314 } 6315 6316 /* Count unique nonzeros of this offdiag row */ 6317 for (p = mid; p < s;) { 6318 col = j[p]; 6319 do { 6320 p++; 6321 } while (p < s && j[p] == col); 6322 Bnnz++; 6323 } 6324 k = s; 6325 } 6326 6327 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6328 PetscCall(PetscMalloc1(Atot, &Aperm)); 6329 PetscCall(PetscMalloc1(Btot, &Bperm)); 6330 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6331 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6332 6333 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6334 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6335 for (r = 0; r < m; r++) { 6336 k = rowBegin[r]; 6337 mid = rowMid[r]; 6338 s = rowEnd[r]; 6339 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6340 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6341 Atot += mid - k; 6342 Btot += s - mid; 6343 6344 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6345 for (p = k; p < mid;) { 6346 col = j[p]; 6347 q = p; 6348 do { 6349 p++; 6350 } while (p < mid && j[p] == col); 6351 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6352 Annz++; 6353 } 6354 6355 for (p = mid; p < s;) { 6356 col = j[p]; 6357 q = p; 6358 do { 6359 p++; 6360 } while (p < s && j[p] == col); 6361 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6362 Bnnz++; 6363 } 6364 } 6365 /* Output */ 6366 *Aperm_ = Aperm; 6367 *Annz_ = Annz; 6368 *Atot_ = Atot; 6369 *Ajmap_ = Ajmap; 6370 *Bperm_ = Bperm; 6371 *Bnnz_ = Bnnz; 6372 *Btot_ = Btot; 6373 *Bjmap_ = Bjmap; 6374 PetscFunctionReturn(PETSC_SUCCESS); 6375 } 6376 6377 /* 6378 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6379 6380 Input Parameters: 6381 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6382 nnz: number of unique nonzeros in the merged matrix 6383 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6384 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6385 6386 Output Parameter: (memory is allocated by the caller) 6387 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6388 6389 Example: 6390 nnz1 = 4 6391 nnz = 6 6392 imap = [1,3,4,5] 6393 jmap = [0,3,5,6,7] 6394 then, 6395 jmap_new = [0,0,3,3,5,6,7] 6396 */ 6397 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6398 { 6399 PetscCount k, p; 6400 6401 PetscFunctionBegin; 6402 jmap_new[0] = 0; 6403 p = nnz; /* p loops over jmap_new[] backwards */ 6404 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6405 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6406 } 6407 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6408 PetscFunctionReturn(PETSC_SUCCESS); 6409 } 6410 6411 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6412 { 6413 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6414 6415 PetscFunctionBegin; 6416 PetscCall(PetscSFDestroy(&coo->sf)); 6417 PetscCall(PetscFree(coo->Aperm1)); 6418 PetscCall(PetscFree(coo->Bperm1)); 6419 PetscCall(PetscFree(coo->Ajmap1)); 6420 PetscCall(PetscFree(coo->Bjmap1)); 6421 PetscCall(PetscFree(coo->Aimap2)); 6422 PetscCall(PetscFree(coo->Bimap2)); 6423 PetscCall(PetscFree(coo->Aperm2)); 6424 PetscCall(PetscFree(coo->Bperm2)); 6425 PetscCall(PetscFree(coo->Ajmap2)); 6426 PetscCall(PetscFree(coo->Bjmap2)); 6427 PetscCall(PetscFree(coo->Cperm1)); 6428 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6429 PetscCall(PetscFree(coo)); 6430 PetscFunctionReturn(PETSC_SUCCESS); 6431 } 6432 6433 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6434 { 6435 MPI_Comm comm; 6436 PetscMPIInt rank, size; 6437 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6438 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6439 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6440 PetscContainer container; 6441 MatCOOStruct_MPIAIJ *coo; 6442 6443 PetscFunctionBegin; 6444 PetscCall(PetscFree(mpiaij->garray)); 6445 PetscCall(VecDestroy(&mpiaij->lvec)); 6446 #if defined(PETSC_USE_CTABLE) 6447 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6448 #else 6449 PetscCall(PetscFree(mpiaij->colmap)); 6450 #endif 6451 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6452 mat->assembled = PETSC_FALSE; 6453 mat->was_assembled = PETSC_FALSE; 6454 6455 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6456 PetscCallMPI(MPI_Comm_size(comm, &size)); 6457 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6458 PetscCall(PetscLayoutSetUp(mat->rmap)); 6459 PetscCall(PetscLayoutSetUp(mat->cmap)); 6460 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6461 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6462 PetscCall(MatGetLocalSize(mat, &m, &n)); 6463 PetscCall(MatGetSize(mat, &M, &N)); 6464 6465 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6466 /* entries come first, then local rows, then remote rows. */ 6467 PetscCount n1 = coo_n, *perm1; 6468 PetscInt *i1 = coo_i, *j1 = coo_j; 6469 6470 PetscCall(PetscMalloc1(n1, &perm1)); 6471 for (k = 0; k < n1; k++) perm1[k] = k; 6472 6473 /* Manipulate indices so that entries with negative row or col indices will have smallest 6474 row indices, local entries will have greater but negative row indices, and remote entries 6475 will have positive row indices. 6476 */ 6477 for (k = 0; k < n1; k++) { 6478 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6479 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6480 else { 6481 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6482 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6483 } 6484 } 6485 6486 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6487 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6488 6489 /* Advance k to the first entry we need to take care of */ 6490 for (k = 0; k < n1; k++) 6491 if (i1[k] > PETSC_INT_MIN) break; 6492 PetscCount i1start = k; 6493 6494 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6495 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6496 6497 /* Send remote rows to their owner */ 6498 /* Find which rows should be sent to which remote ranks*/ 6499 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6500 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6501 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6502 const PetscInt *ranges; 6503 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6504 6505 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6506 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6507 for (k = rem; k < n1;) { 6508 PetscMPIInt owner; 6509 PetscInt firstRow, lastRow; 6510 6511 /* Locate a row range */ 6512 firstRow = i1[k]; /* first row of this owner */ 6513 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6514 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6515 6516 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6517 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6518 6519 /* All entries in [k,p) belong to this remote owner */ 6520 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6521 PetscMPIInt *sendto2; 6522 PetscInt *nentries2; 6523 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6524 6525 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6526 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6527 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6528 PetscCall(PetscFree2(sendto, nentries2)); 6529 sendto = sendto2; 6530 nentries = nentries2; 6531 maxNsend = maxNsend2; 6532 } 6533 sendto[nsend] = owner; 6534 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6535 nsend++; 6536 k = p; 6537 } 6538 6539 /* Build 1st SF to know offsets on remote to send data */ 6540 PetscSF sf1; 6541 PetscInt nroots = 1, nroots2 = 0; 6542 PetscInt nleaves = nsend, nleaves2 = 0; 6543 PetscInt *offsets; 6544 PetscSFNode *iremote; 6545 6546 PetscCall(PetscSFCreate(comm, &sf1)); 6547 PetscCall(PetscMalloc1(nsend, &iremote)); 6548 PetscCall(PetscMalloc1(nsend, &offsets)); 6549 for (k = 0; k < nsend; k++) { 6550 iremote[k].rank = sendto[k]; 6551 iremote[k].index = 0; 6552 nleaves2 += nentries[k]; 6553 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6554 } 6555 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6556 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6557 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6558 PetscCall(PetscSFDestroy(&sf1)); 6559 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6560 6561 /* Build 2nd SF to send remote COOs to their owner */ 6562 PetscSF sf2; 6563 nroots = nroots2; 6564 nleaves = nleaves2; 6565 PetscCall(PetscSFCreate(comm, &sf2)); 6566 PetscCall(PetscSFSetFromOptions(sf2)); 6567 PetscCall(PetscMalloc1(nleaves, &iremote)); 6568 p = 0; 6569 for (k = 0; k < nsend; k++) { 6570 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6571 for (q = 0; q < nentries[k]; q++, p++) { 6572 iremote[p].rank = sendto[k]; 6573 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6574 } 6575 } 6576 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6577 6578 /* Send the remote COOs to their owner */ 6579 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6580 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6581 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6582 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6583 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6584 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6585 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6586 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6587 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6588 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6589 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6590 6591 PetscCall(PetscFree(offsets)); 6592 PetscCall(PetscFree2(sendto, nentries)); 6593 6594 /* Sort received COOs by row along with the permutation array */ 6595 for (k = 0; k < n2; k++) perm2[k] = k; 6596 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6597 6598 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6599 PetscCount *Cperm1; 6600 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6601 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6602 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6603 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6604 6605 /* Support for HYPRE matrices, kind of a hack. 6606 Swap min column with diagonal so that diagonal values will go first */ 6607 PetscBool hypre; 6608 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6609 if (hypre) { 6610 PetscInt *minj; 6611 PetscBT hasdiag; 6612 6613 PetscCall(PetscBTCreate(m, &hasdiag)); 6614 PetscCall(PetscMalloc1(m, &minj)); 6615 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6616 for (k = i1start; k < rem; k++) { 6617 if (j1[k] < cstart || j1[k] >= cend) continue; 6618 const PetscInt rindex = i1[k] - rstart; 6619 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6620 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6621 } 6622 for (k = 0; k < n2; k++) { 6623 if (j2[k] < cstart || j2[k] >= cend) continue; 6624 const PetscInt rindex = i2[k] - rstart; 6625 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6626 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6627 } 6628 for (k = i1start; k < rem; k++) { 6629 const PetscInt rindex = i1[k] - rstart; 6630 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6631 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6632 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6633 } 6634 for (k = 0; k < n2; k++) { 6635 const PetscInt rindex = i2[k] - rstart; 6636 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6637 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6638 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6639 } 6640 PetscCall(PetscBTDestroy(&hasdiag)); 6641 PetscCall(PetscFree(minj)); 6642 } 6643 6644 /* Split local COOs and received COOs into diag/offdiag portions */ 6645 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6646 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6647 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6648 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6649 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6650 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6651 6652 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6653 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6654 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6655 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6656 6657 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6658 PetscInt *Ai, *Bi; 6659 PetscInt *Aj, *Bj; 6660 6661 PetscCall(PetscMalloc1(m + 1, &Ai)); 6662 PetscCall(PetscMalloc1(m + 1, &Bi)); 6663 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6664 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6665 6666 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6667 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6668 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6669 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6670 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6671 6672 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6673 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6674 6675 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6676 /* expect nonzeros in A/B most likely have local contributing entries */ 6677 PetscInt Annz = Ai[m]; 6678 PetscInt Bnnz = Bi[m]; 6679 PetscCount *Ajmap1_new, *Bjmap1_new; 6680 6681 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6682 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6683 6684 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6685 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6686 6687 PetscCall(PetscFree(Aimap1)); 6688 PetscCall(PetscFree(Ajmap1)); 6689 PetscCall(PetscFree(Bimap1)); 6690 PetscCall(PetscFree(Bjmap1)); 6691 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6692 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6693 PetscCall(PetscFree(perm1)); 6694 PetscCall(PetscFree3(i2, j2, perm2)); 6695 6696 Ajmap1 = Ajmap1_new; 6697 Bjmap1 = Bjmap1_new; 6698 6699 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6700 if (Annz < Annz1 + Annz2) { 6701 PetscInt *Aj_new; 6702 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6703 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6704 PetscCall(PetscFree(Aj)); 6705 Aj = Aj_new; 6706 } 6707 6708 if (Bnnz < Bnnz1 + Bnnz2) { 6709 PetscInt *Bj_new; 6710 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6711 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6712 PetscCall(PetscFree(Bj)); 6713 Bj = Bj_new; 6714 } 6715 6716 /* Create new submatrices for on-process and off-process coupling */ 6717 PetscScalar *Aa, *Ba; 6718 MatType rtype; 6719 Mat_SeqAIJ *a, *b; 6720 PetscObjectState state; 6721 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6722 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6723 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6724 if (cstart) { 6725 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6726 } 6727 6728 PetscCall(MatGetRootType_Private(mat, &rtype)); 6729 6730 MatSeqXAIJGetOptions_Private(mpiaij->A); 6731 PetscCall(MatDestroy(&mpiaij->A)); 6732 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6733 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6734 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6735 6736 MatSeqXAIJGetOptions_Private(mpiaij->B); 6737 PetscCall(MatDestroy(&mpiaij->B)); 6738 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6739 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6740 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6741 6742 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6743 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6744 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6745 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6746 6747 a = (Mat_SeqAIJ *)mpiaij->A->data; 6748 b = (Mat_SeqAIJ *)mpiaij->B->data; 6749 a->free_a = PETSC_TRUE; 6750 a->free_ij = PETSC_TRUE; 6751 b->free_a = PETSC_TRUE; 6752 b->free_ij = PETSC_TRUE; 6753 a->maxnz = a->nz; 6754 b->maxnz = b->nz; 6755 6756 /* conversion must happen AFTER multiply setup */ 6757 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6758 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6759 PetscCall(VecDestroy(&mpiaij->lvec)); 6760 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6761 6762 // Put the COO struct in a container and then attach that to the matrix 6763 PetscCall(PetscMalloc1(1, &coo)); 6764 coo->n = coo_n; 6765 coo->sf = sf2; 6766 coo->sendlen = nleaves; 6767 coo->recvlen = nroots; 6768 coo->Annz = Annz; 6769 coo->Bnnz = Bnnz; 6770 coo->Annz2 = Annz2; 6771 coo->Bnnz2 = Bnnz2; 6772 coo->Atot1 = Atot1; 6773 coo->Atot2 = Atot2; 6774 coo->Btot1 = Btot1; 6775 coo->Btot2 = Btot2; 6776 coo->Ajmap1 = Ajmap1; 6777 coo->Aperm1 = Aperm1; 6778 coo->Bjmap1 = Bjmap1; 6779 coo->Bperm1 = Bperm1; 6780 coo->Aimap2 = Aimap2; 6781 coo->Ajmap2 = Ajmap2; 6782 coo->Aperm2 = Aperm2; 6783 coo->Bimap2 = Bimap2; 6784 coo->Bjmap2 = Bjmap2; 6785 coo->Bperm2 = Bperm2; 6786 coo->Cperm1 = Cperm1; 6787 // Allocate in preallocation. If not used, it has zero cost on host 6788 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6789 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6790 PetscCall(PetscContainerSetPointer(container, coo)); 6791 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6792 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6793 PetscCall(PetscContainerDestroy(&container)); 6794 PetscFunctionReturn(PETSC_SUCCESS); 6795 } 6796 6797 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6798 { 6799 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6800 Mat A = mpiaij->A, B = mpiaij->B; 6801 PetscScalar *Aa, *Ba; 6802 PetscScalar *sendbuf, *recvbuf; 6803 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6804 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6805 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6806 const PetscCount *Cperm1; 6807 PetscContainer container; 6808 MatCOOStruct_MPIAIJ *coo; 6809 6810 PetscFunctionBegin; 6811 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6812 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6813 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6814 sendbuf = coo->sendbuf; 6815 recvbuf = coo->recvbuf; 6816 Ajmap1 = coo->Ajmap1; 6817 Ajmap2 = coo->Ajmap2; 6818 Aimap2 = coo->Aimap2; 6819 Bjmap1 = coo->Bjmap1; 6820 Bjmap2 = coo->Bjmap2; 6821 Bimap2 = coo->Bimap2; 6822 Aperm1 = coo->Aperm1; 6823 Aperm2 = coo->Aperm2; 6824 Bperm1 = coo->Bperm1; 6825 Bperm2 = coo->Bperm2; 6826 Cperm1 = coo->Cperm1; 6827 6828 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6829 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6830 6831 /* Pack entries to be sent to remote */ 6832 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6833 6834 /* Send remote entries to their owner and overlap the communication with local computation */ 6835 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6836 /* Add local entries to A and B */ 6837 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6838 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6839 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6840 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6841 } 6842 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6843 PetscScalar sum = 0.0; 6844 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6845 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6846 } 6847 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6848 6849 /* Add received remote entries to A and B */ 6850 for (PetscCount i = 0; i < coo->Annz2; i++) { 6851 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6852 } 6853 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6854 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6855 } 6856 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6857 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6858 PetscFunctionReturn(PETSC_SUCCESS); 6859 } 6860 6861 /*MC 6862 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6863 6864 Options Database Keys: 6865 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6866 6867 Level: beginner 6868 6869 Notes: 6870 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6871 in this case the values associated with the rows and columns one passes in are set to zero 6872 in the matrix 6873 6874 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6875 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6876 6877 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6878 M*/ 6879 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6880 { 6881 Mat_MPIAIJ *b; 6882 PetscMPIInt size; 6883 6884 PetscFunctionBegin; 6885 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6886 6887 PetscCall(PetscNew(&b)); 6888 B->data = (void *)b; 6889 B->ops[0] = MatOps_Values; 6890 B->assembled = PETSC_FALSE; 6891 B->insertmode = NOT_SET_VALUES; 6892 b->size = size; 6893 6894 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6895 6896 /* build cache for off array entries formed */ 6897 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6898 6899 b->donotstash = PETSC_FALSE; 6900 b->colmap = NULL; 6901 b->garray = NULL; 6902 b->roworiented = PETSC_TRUE; 6903 6904 /* stuff used for matrix vector multiply */ 6905 b->lvec = NULL; 6906 b->Mvctx = NULL; 6907 6908 /* stuff for MatGetRow() */ 6909 b->rowindices = NULL; 6910 b->rowvalues = NULL; 6911 b->getrowactive = PETSC_FALSE; 6912 6913 /* flexible pointer used in CUSPARSE classes */ 6914 b->spptr = NULL; 6915 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6927 #if defined(PETSC_HAVE_CUDA) 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6929 #endif 6930 #if defined(PETSC_HAVE_HIP) 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6932 #endif 6933 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6935 #endif 6936 #if defined(PETSC_HAVE_MKL_SPARSE) 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6938 #endif 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6943 #if defined(PETSC_HAVE_ELEMENTAL) 6944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6945 #endif 6946 #if defined(PETSC_HAVE_SCALAPACK) 6947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6948 #endif 6949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6951 #if defined(PETSC_HAVE_HYPRE) 6952 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6954 #endif 6955 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6956 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6957 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6958 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6959 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6960 PetscFunctionReturn(PETSC_SUCCESS); 6961 } 6962 6963 /*@ 6964 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6965 and "off-diagonal" part of the matrix in CSR format. 6966 6967 Collective 6968 6969 Input Parameters: 6970 + comm - MPI communicator 6971 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6972 . n - This value should be the same as the local size used in creating the 6973 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6974 calculated if `N` is given) For square matrices `n` is almost always `m`. 6975 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6976 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6977 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6978 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6979 . a - matrix values 6980 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6981 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6982 - oa - matrix values 6983 6984 Output Parameter: 6985 . mat - the matrix 6986 6987 Level: advanced 6988 6989 Notes: 6990 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6991 must free the arrays once the matrix has been destroyed and not before. 6992 6993 The `i` and `j` indices are 0 based 6994 6995 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6996 6997 This sets local rows and cannot be used to set off-processor values. 6998 6999 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 7000 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 7001 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 7002 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 7003 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7004 communication if it is known that only local entries will be set. 7005 7006 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7007 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7008 @*/ 7009 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7010 { 7011 Mat_MPIAIJ *maij; 7012 7013 PetscFunctionBegin; 7014 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7015 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7016 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7017 PetscCall(MatCreate(comm, mat)); 7018 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7019 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7020 maij = (Mat_MPIAIJ *)(*mat)->data; 7021 7022 (*mat)->preallocated = PETSC_TRUE; 7023 7024 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7025 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7026 7027 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7028 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7029 7030 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7031 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7032 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7033 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7034 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7035 PetscFunctionReturn(PETSC_SUCCESS); 7036 } 7037 7038 typedef struct { 7039 Mat *mp; /* intermediate products */ 7040 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7041 PetscInt cp; /* number of intermediate products */ 7042 7043 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7044 PetscInt *startsj_s, *startsj_r; 7045 PetscScalar *bufa; 7046 Mat P_oth; 7047 7048 /* may take advantage of merging product->B */ 7049 Mat Bloc; /* B-local by merging diag and off-diag */ 7050 7051 /* cusparse does not have support to split between symbolic and numeric phases. 7052 When api_user is true, we don't need to update the numerical values 7053 of the temporary storage */ 7054 PetscBool reusesym; 7055 7056 /* support for COO values insertion */ 7057 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7058 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7059 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7060 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7061 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7062 PetscMemType mtype; 7063 7064 /* customization */ 7065 PetscBool abmerge; 7066 PetscBool P_oth_bind; 7067 } MatMatMPIAIJBACKEND; 7068 7069 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7070 { 7071 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7072 PetscInt i; 7073 7074 PetscFunctionBegin; 7075 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7076 PetscCall(PetscFree(mmdata->bufa)); 7077 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7078 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7079 PetscCall(MatDestroy(&mmdata->P_oth)); 7080 PetscCall(MatDestroy(&mmdata->Bloc)); 7081 PetscCall(PetscSFDestroy(&mmdata->sf)); 7082 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7083 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7084 PetscCall(PetscFree(mmdata->own[0])); 7085 PetscCall(PetscFree(mmdata->own)); 7086 PetscCall(PetscFree(mmdata->off[0])); 7087 PetscCall(PetscFree(mmdata->off)); 7088 PetscCall(PetscFree(mmdata)); 7089 PetscFunctionReturn(PETSC_SUCCESS); 7090 } 7091 7092 /* Copy selected n entries with indices in idx[] of A to v[]. 7093 If idx is NULL, copy the whole data array of A to v[] 7094 */ 7095 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7096 { 7097 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7098 7099 PetscFunctionBegin; 7100 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7101 if (f) { 7102 PetscCall((*f)(A, n, idx, v)); 7103 } else { 7104 const PetscScalar *vv; 7105 7106 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7107 if (n && idx) { 7108 PetscScalar *w = v; 7109 const PetscInt *oi = idx; 7110 PetscInt j; 7111 7112 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7113 } else { 7114 PetscCall(PetscArraycpy(v, vv, n)); 7115 } 7116 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7117 } 7118 PetscFunctionReturn(PETSC_SUCCESS); 7119 } 7120 7121 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7122 { 7123 MatMatMPIAIJBACKEND *mmdata; 7124 PetscInt i, n_d, n_o; 7125 7126 PetscFunctionBegin; 7127 MatCheckProduct(C, 1); 7128 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7129 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7130 if (!mmdata->reusesym) { /* update temporary matrices */ 7131 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7132 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7133 } 7134 mmdata->reusesym = PETSC_FALSE; 7135 7136 for (i = 0; i < mmdata->cp; i++) { 7137 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7138 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7139 } 7140 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7141 PetscInt noff; 7142 7143 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7144 if (mmdata->mptmp[i]) continue; 7145 if (noff) { 7146 PetscInt nown; 7147 7148 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7149 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7150 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7151 n_o += noff; 7152 n_d += nown; 7153 } else { 7154 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7155 7156 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7157 n_d += mm->nz; 7158 } 7159 } 7160 if (mmdata->hasoffproc) { /* offprocess insertion */ 7161 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7162 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7163 } 7164 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7165 PetscFunctionReturn(PETSC_SUCCESS); 7166 } 7167 7168 /* Support for Pt * A, A * P, or Pt * A * P */ 7169 #define MAX_NUMBER_INTERMEDIATE 4 7170 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7171 { 7172 Mat_Product *product = C->product; 7173 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7174 Mat_MPIAIJ *a, *p; 7175 MatMatMPIAIJBACKEND *mmdata; 7176 ISLocalToGlobalMapping P_oth_l2g = NULL; 7177 IS glob = NULL; 7178 const char *prefix; 7179 char pprefix[256]; 7180 const PetscInt *globidx, *P_oth_idx; 7181 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7182 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7183 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7184 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7185 /* a base offset; type-2: sparse with a local to global map table */ 7186 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7187 7188 MatProductType ptype; 7189 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7190 PetscMPIInt size; 7191 7192 PetscFunctionBegin; 7193 MatCheckProduct(C, 1); 7194 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7195 ptype = product->type; 7196 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7197 ptype = MATPRODUCT_AB; 7198 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7199 } 7200 switch (ptype) { 7201 case MATPRODUCT_AB: 7202 A = product->A; 7203 P = product->B; 7204 m = A->rmap->n; 7205 n = P->cmap->n; 7206 M = A->rmap->N; 7207 N = P->cmap->N; 7208 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7209 break; 7210 case MATPRODUCT_AtB: 7211 P = product->A; 7212 A = product->B; 7213 m = P->cmap->n; 7214 n = A->cmap->n; 7215 M = P->cmap->N; 7216 N = A->cmap->N; 7217 hasoffproc = PETSC_TRUE; 7218 break; 7219 case MATPRODUCT_PtAP: 7220 A = product->A; 7221 P = product->B; 7222 m = P->cmap->n; 7223 n = P->cmap->n; 7224 M = P->cmap->N; 7225 N = P->cmap->N; 7226 hasoffproc = PETSC_TRUE; 7227 break; 7228 default: 7229 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7230 } 7231 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7232 if (size == 1) hasoffproc = PETSC_FALSE; 7233 7234 /* defaults */ 7235 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7236 mp[i] = NULL; 7237 mptmp[i] = PETSC_FALSE; 7238 rmapt[i] = -1; 7239 cmapt[i] = -1; 7240 rmapa[i] = NULL; 7241 cmapa[i] = NULL; 7242 } 7243 7244 /* customization */ 7245 PetscCall(PetscNew(&mmdata)); 7246 mmdata->reusesym = product->api_user; 7247 if (ptype == MATPRODUCT_AB) { 7248 if (product->api_user) { 7249 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7250 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7251 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7252 PetscOptionsEnd(); 7253 } else { 7254 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7255 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7256 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7257 PetscOptionsEnd(); 7258 } 7259 } else if (ptype == MATPRODUCT_PtAP) { 7260 if (product->api_user) { 7261 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7262 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7263 PetscOptionsEnd(); 7264 } else { 7265 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7266 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7267 PetscOptionsEnd(); 7268 } 7269 } 7270 a = (Mat_MPIAIJ *)A->data; 7271 p = (Mat_MPIAIJ *)P->data; 7272 PetscCall(MatSetSizes(C, m, n, M, N)); 7273 PetscCall(PetscLayoutSetUp(C->rmap)); 7274 PetscCall(PetscLayoutSetUp(C->cmap)); 7275 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7276 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7277 7278 cp = 0; 7279 switch (ptype) { 7280 case MATPRODUCT_AB: /* A * P */ 7281 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7282 7283 /* A_diag * P_local (merged or not) */ 7284 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7285 /* P is product->B */ 7286 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7287 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7288 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7289 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7290 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7291 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7292 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7293 mp[cp]->product->api_user = product->api_user; 7294 PetscCall(MatProductSetFromOptions(mp[cp])); 7295 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7296 PetscCall(ISGetIndices(glob, &globidx)); 7297 rmapt[cp] = 1; 7298 cmapt[cp] = 2; 7299 cmapa[cp] = globidx; 7300 mptmp[cp] = PETSC_FALSE; 7301 cp++; 7302 } else { /* A_diag * P_diag and A_diag * P_off */ 7303 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 1; 7313 cmapt[cp] = 1; 7314 mptmp[cp] = PETSC_FALSE; 7315 cp++; 7316 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7317 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7318 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7319 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7320 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7321 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7322 mp[cp]->product->api_user = product->api_user; 7323 PetscCall(MatProductSetFromOptions(mp[cp])); 7324 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7325 rmapt[cp] = 1; 7326 cmapt[cp] = 2; 7327 cmapa[cp] = p->garray; 7328 mptmp[cp] = PETSC_FALSE; 7329 cp++; 7330 } 7331 7332 /* A_off * P_other */ 7333 if (mmdata->P_oth) { 7334 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7335 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7336 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7337 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7338 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7339 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7340 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7341 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7342 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7343 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7344 mp[cp]->product->api_user = product->api_user; 7345 PetscCall(MatProductSetFromOptions(mp[cp])); 7346 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7347 rmapt[cp] = 1; 7348 cmapt[cp] = 2; 7349 cmapa[cp] = P_oth_idx; 7350 mptmp[cp] = PETSC_FALSE; 7351 cp++; 7352 } 7353 break; 7354 7355 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7356 /* A is product->B */ 7357 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7358 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7359 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7360 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7361 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7362 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7363 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7364 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7365 mp[cp]->product->api_user = product->api_user; 7366 PetscCall(MatProductSetFromOptions(mp[cp])); 7367 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7368 PetscCall(ISGetIndices(glob, &globidx)); 7369 rmapt[cp] = 2; 7370 rmapa[cp] = globidx; 7371 cmapt[cp] = 2; 7372 cmapa[cp] = globidx; 7373 mptmp[cp] = PETSC_FALSE; 7374 cp++; 7375 } else { 7376 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7377 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7378 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7379 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7380 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7381 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7382 mp[cp]->product->api_user = product->api_user; 7383 PetscCall(MatProductSetFromOptions(mp[cp])); 7384 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7385 PetscCall(ISGetIndices(glob, &globidx)); 7386 rmapt[cp] = 1; 7387 cmapt[cp] = 2; 7388 cmapa[cp] = globidx; 7389 mptmp[cp] = PETSC_FALSE; 7390 cp++; 7391 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7392 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7393 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7394 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7395 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7396 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7397 mp[cp]->product->api_user = product->api_user; 7398 PetscCall(MatProductSetFromOptions(mp[cp])); 7399 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7400 rmapt[cp] = 2; 7401 rmapa[cp] = p->garray; 7402 cmapt[cp] = 2; 7403 cmapa[cp] = globidx; 7404 mptmp[cp] = PETSC_FALSE; 7405 cp++; 7406 } 7407 break; 7408 case MATPRODUCT_PtAP: 7409 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7410 /* P is product->B */ 7411 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7412 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7413 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7414 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7415 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7416 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7417 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7418 mp[cp]->product->api_user = product->api_user; 7419 PetscCall(MatProductSetFromOptions(mp[cp])); 7420 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7421 PetscCall(ISGetIndices(glob, &globidx)); 7422 rmapt[cp] = 2; 7423 rmapa[cp] = globidx; 7424 cmapt[cp] = 2; 7425 cmapa[cp] = globidx; 7426 mptmp[cp] = PETSC_FALSE; 7427 cp++; 7428 if (mmdata->P_oth) { 7429 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7430 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7431 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7432 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7433 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7434 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7435 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7436 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7437 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7438 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7439 mp[cp]->product->api_user = product->api_user; 7440 PetscCall(MatProductSetFromOptions(mp[cp])); 7441 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7442 mptmp[cp] = PETSC_TRUE; 7443 cp++; 7444 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7445 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7446 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7447 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7448 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7449 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7450 mp[cp]->product->api_user = product->api_user; 7451 PetscCall(MatProductSetFromOptions(mp[cp])); 7452 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7453 rmapt[cp] = 2; 7454 rmapa[cp] = globidx; 7455 cmapt[cp] = 2; 7456 cmapa[cp] = P_oth_idx; 7457 mptmp[cp] = PETSC_FALSE; 7458 cp++; 7459 } 7460 break; 7461 default: 7462 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7463 } 7464 /* sanity check */ 7465 if (size > 1) 7466 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7467 7468 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7469 for (i = 0; i < cp; i++) { 7470 mmdata->mp[i] = mp[i]; 7471 mmdata->mptmp[i] = mptmp[i]; 7472 } 7473 mmdata->cp = cp; 7474 C->product->data = mmdata; 7475 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7476 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7477 7478 /* memory type */ 7479 mmdata->mtype = PETSC_MEMTYPE_HOST; 7480 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7481 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7482 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7483 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7484 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7485 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7486 7487 /* prepare coo coordinates for values insertion */ 7488 7489 /* count total nonzeros of those intermediate seqaij Mats 7490 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7491 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7492 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7493 */ 7494 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7495 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7496 if (mptmp[cp]) continue; 7497 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7498 const PetscInt *rmap = rmapa[cp]; 7499 const PetscInt mr = mp[cp]->rmap->n; 7500 const PetscInt rs = C->rmap->rstart; 7501 const PetscInt re = C->rmap->rend; 7502 const PetscInt *ii = mm->i; 7503 for (i = 0; i < mr; i++) { 7504 const PetscInt gr = rmap[i]; 7505 const PetscInt nz = ii[i + 1] - ii[i]; 7506 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7507 else ncoo_oown += nz; /* this row is local */ 7508 } 7509 } else ncoo_d += mm->nz; 7510 } 7511 7512 /* 7513 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7514 7515 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7516 7517 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7518 7519 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7520 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7521 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7522 7523 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7524 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7525 */ 7526 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7527 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7528 7529 /* gather (i,j) of nonzeros inserted by remote procs */ 7530 if (hasoffproc) { 7531 PetscSF msf; 7532 PetscInt ncoo2, *coo_i2, *coo_j2; 7533 7534 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7535 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7536 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7537 7538 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7539 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7540 PetscInt *idxoff = mmdata->off[cp]; 7541 PetscInt *idxown = mmdata->own[cp]; 7542 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7543 const PetscInt *rmap = rmapa[cp]; 7544 const PetscInt *cmap = cmapa[cp]; 7545 const PetscInt *ii = mm->i; 7546 PetscInt *coi = coo_i + ncoo_o; 7547 PetscInt *coj = coo_j + ncoo_o; 7548 const PetscInt mr = mp[cp]->rmap->n; 7549 const PetscInt rs = C->rmap->rstart; 7550 const PetscInt re = C->rmap->rend; 7551 const PetscInt cs = C->cmap->rstart; 7552 for (i = 0; i < mr; i++) { 7553 const PetscInt *jj = mm->j + ii[i]; 7554 const PetscInt gr = rmap[i]; 7555 const PetscInt nz = ii[i + 1] - ii[i]; 7556 if (gr < rs || gr >= re) { /* this is an offproc row */ 7557 for (j = ii[i]; j < ii[i + 1]; j++) { 7558 *coi++ = gr; 7559 *idxoff++ = j; 7560 } 7561 if (!cmapt[cp]) { /* already global */ 7562 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7563 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7564 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7565 } else { /* offdiag */ 7566 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7567 } 7568 ncoo_o += nz; 7569 } else { /* this is a local row */ 7570 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7571 } 7572 } 7573 } 7574 mmdata->off[cp + 1] = idxoff; 7575 mmdata->own[cp + 1] = idxown; 7576 } 7577 7578 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7579 PetscInt incoo_o; 7580 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7581 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7582 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7583 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7584 ncoo = ncoo_d + ncoo_oown + ncoo2; 7585 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7586 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7587 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7588 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7589 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7590 PetscCall(PetscFree2(coo_i, coo_j)); 7591 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7592 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7593 coo_i = coo_i2; 7594 coo_j = coo_j2; 7595 } else { /* no offproc values insertion */ 7596 ncoo = ncoo_d; 7597 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7598 7599 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7600 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7601 PetscCall(PetscSFSetUp(mmdata->sf)); 7602 } 7603 mmdata->hasoffproc = hasoffproc; 7604 7605 /* gather (i,j) of nonzeros inserted locally */ 7606 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7607 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7608 PetscInt *coi = coo_i + ncoo_d; 7609 PetscInt *coj = coo_j + ncoo_d; 7610 const PetscInt *jj = mm->j; 7611 const PetscInt *ii = mm->i; 7612 const PetscInt *cmap = cmapa[cp]; 7613 const PetscInt *rmap = rmapa[cp]; 7614 const PetscInt mr = mp[cp]->rmap->n; 7615 const PetscInt rs = C->rmap->rstart; 7616 const PetscInt re = C->rmap->rend; 7617 const PetscInt cs = C->cmap->rstart; 7618 7619 if (mptmp[cp]) continue; 7620 if (rmapt[cp] == 1) { /* consecutive rows */ 7621 /* fill coo_i */ 7622 for (i = 0; i < mr; i++) { 7623 const PetscInt gr = i + rs; 7624 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7625 } 7626 /* fill coo_j */ 7627 if (!cmapt[cp]) { /* type-0, already global */ 7628 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7629 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7630 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7631 } else { /* type-2, local to global for sparse columns */ 7632 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7633 } 7634 ncoo_d += mm->nz; 7635 } else if (rmapt[cp] == 2) { /* sparse rows */ 7636 for (i = 0; i < mr; i++) { 7637 const PetscInt *jj = mm->j + ii[i]; 7638 const PetscInt gr = rmap[i]; 7639 const PetscInt nz = ii[i + 1] - ii[i]; 7640 if (gr >= rs && gr < re) { /* local rows */ 7641 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7642 if (!cmapt[cp]) { /* type-0, already global */ 7643 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7644 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7645 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7646 } else { /* type-2, local to global for sparse columns */ 7647 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7648 } 7649 ncoo_d += nz; 7650 } 7651 } 7652 } 7653 } 7654 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7655 PetscCall(ISDestroy(&glob)); 7656 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7657 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7658 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7659 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7660 7661 /* preallocate with COO data */ 7662 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7663 PetscCall(PetscFree2(coo_i, coo_j)); 7664 PetscFunctionReturn(PETSC_SUCCESS); 7665 } 7666 7667 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7668 { 7669 Mat_Product *product = mat->product; 7670 #if defined(PETSC_HAVE_DEVICE) 7671 PetscBool match = PETSC_FALSE; 7672 PetscBool usecpu = PETSC_FALSE; 7673 #else 7674 PetscBool match = PETSC_TRUE; 7675 #endif 7676 7677 PetscFunctionBegin; 7678 MatCheckProduct(mat, 1); 7679 #if defined(PETSC_HAVE_DEVICE) 7680 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7681 if (match) { /* we can always fallback to the CPU if requested */ 7682 switch (product->type) { 7683 case MATPRODUCT_AB: 7684 if (product->api_user) { 7685 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7686 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7687 PetscOptionsEnd(); 7688 } else { 7689 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7690 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7691 PetscOptionsEnd(); 7692 } 7693 break; 7694 case MATPRODUCT_AtB: 7695 if (product->api_user) { 7696 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7697 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7698 PetscOptionsEnd(); 7699 } else { 7700 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7701 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7702 PetscOptionsEnd(); 7703 } 7704 break; 7705 case MATPRODUCT_PtAP: 7706 if (product->api_user) { 7707 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7708 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7709 PetscOptionsEnd(); 7710 } else { 7711 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7712 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7713 PetscOptionsEnd(); 7714 } 7715 break; 7716 default: 7717 break; 7718 } 7719 match = (PetscBool)!usecpu; 7720 } 7721 #endif 7722 if (match) { 7723 switch (product->type) { 7724 case MATPRODUCT_AB: 7725 case MATPRODUCT_AtB: 7726 case MATPRODUCT_PtAP: 7727 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7728 break; 7729 default: 7730 break; 7731 } 7732 } 7733 /* fallback to MPIAIJ ops */ 7734 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7735 PetscFunctionReturn(PETSC_SUCCESS); 7736 } 7737 7738 /* 7739 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7740 7741 n - the number of block indices in cc[] 7742 cc - the block indices (must be large enough to contain the indices) 7743 */ 7744 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7745 { 7746 PetscInt cnt = -1, nidx, j; 7747 const PetscInt *idx; 7748 7749 PetscFunctionBegin; 7750 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7751 if (nidx) { 7752 cnt = 0; 7753 cc[cnt] = idx[0] / bs; 7754 for (j = 1; j < nidx; j++) { 7755 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7756 } 7757 } 7758 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7759 *n = cnt + 1; 7760 PetscFunctionReturn(PETSC_SUCCESS); 7761 } 7762 7763 /* 7764 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7765 7766 ncollapsed - the number of block indices 7767 collapsed - the block indices (must be large enough to contain the indices) 7768 */ 7769 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7770 { 7771 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7772 7773 PetscFunctionBegin; 7774 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7775 for (i = start + 1; i < start + bs; i++) { 7776 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7777 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7778 cprevtmp = cprev; 7779 cprev = merged; 7780 merged = cprevtmp; 7781 } 7782 *ncollapsed = nprev; 7783 if (collapsed) *collapsed = cprev; 7784 PetscFunctionReturn(PETSC_SUCCESS); 7785 } 7786 7787 /* 7788 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7789 7790 Input Parameter: 7791 . Amat - matrix 7792 - symmetrize - make the result symmetric 7793 + scale - scale with diagonal 7794 7795 Output Parameter: 7796 . a_Gmat - output scalar graph >= 0 7797 7798 */ 7799 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7800 { 7801 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7802 MPI_Comm comm; 7803 Mat Gmat; 7804 PetscBool ismpiaij, isseqaij; 7805 Mat a, b, c; 7806 MatType jtype; 7807 7808 PetscFunctionBegin; 7809 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7810 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7811 PetscCall(MatGetSize(Amat, &MM, &NN)); 7812 PetscCall(MatGetBlockSize(Amat, &bs)); 7813 nloc = (Iend - Istart) / bs; 7814 7815 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7816 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7817 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7818 7819 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7820 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7821 implementation */ 7822 if (bs > 1) { 7823 PetscCall(MatGetType(Amat, &jtype)); 7824 PetscCall(MatCreate(comm, &Gmat)); 7825 PetscCall(MatSetType(Gmat, jtype)); 7826 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7827 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7828 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7829 PetscInt *d_nnz, *o_nnz; 7830 MatScalar *aa, val, *AA; 7831 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7832 7833 if (isseqaij) { 7834 a = Amat; 7835 b = NULL; 7836 } else { 7837 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7838 a = d->A; 7839 b = d->B; 7840 } 7841 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7842 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7843 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7844 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7845 const PetscInt *cols1, *cols2; 7846 7847 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7848 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7849 nnz[brow / bs] = nc2 / bs; 7850 if (nc2 % bs) ok = 0; 7851 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7852 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7853 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7854 if (nc1 != nc2) ok = 0; 7855 else { 7856 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7857 if (cols1[jj] != cols2[jj]) ok = 0; 7858 if (cols1[jj] % bs != jj % bs) ok = 0; 7859 } 7860 } 7861 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7862 } 7863 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7864 if (!ok) { 7865 PetscCall(PetscFree2(d_nnz, o_nnz)); 7866 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7867 goto old_bs; 7868 } 7869 } 7870 } 7871 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7872 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7873 PetscCall(PetscFree2(d_nnz, o_nnz)); 7874 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7875 // diag 7876 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7877 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7878 7879 ai = aseq->i; 7880 n = ai[brow + 1] - ai[brow]; 7881 aj = aseq->j + ai[brow]; 7882 for (PetscInt k = 0; k < n; k += bs) { // block columns 7883 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7884 val = 0; 7885 if (index_size == 0) { 7886 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7887 aa = aseq->a + ai[brow + ii] + k; 7888 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7889 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7890 } 7891 } 7892 } else { // use (index,index) value if provided 7893 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7894 PetscInt ii = index[iii]; 7895 aa = aseq->a + ai[brow + ii] + k; 7896 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7897 PetscInt jj = index[jjj]; 7898 val += PetscAbs(PetscRealPart(aa[jj])); 7899 } 7900 } 7901 } 7902 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7903 AA[k / bs] = val; 7904 } 7905 grow = Istart / bs + brow / bs; 7906 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7907 } 7908 // off-diag 7909 if (ismpiaij) { 7910 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7911 const PetscScalar *vals; 7912 const PetscInt *cols, *garray = aij->garray; 7913 7914 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7915 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7916 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7917 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7918 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7919 AA[k / bs] = 0; 7920 AJ[cidx] = garray[cols[k]] / bs; 7921 } 7922 nc = ncols / bs; 7923 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7924 if (index_size == 0) { 7925 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7926 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7927 for (PetscInt k = 0; k < ncols; k += bs) { 7928 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7929 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7930 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7931 } 7932 } 7933 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7934 } 7935 } else { // use (index,index) value if provided 7936 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7937 PetscInt ii = index[iii]; 7938 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7939 for (PetscInt k = 0; k < ncols; k += bs) { 7940 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7941 PetscInt jj = index[jjj]; 7942 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7943 } 7944 } 7945 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7946 } 7947 } 7948 grow = Istart / bs + brow / bs; 7949 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7950 } 7951 } 7952 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7953 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7954 PetscCall(PetscFree2(AA, AJ)); 7955 } else { 7956 const PetscScalar *vals; 7957 const PetscInt *idx; 7958 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7959 old_bs: 7960 /* 7961 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7962 */ 7963 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7964 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7965 if (isseqaij) { 7966 PetscInt max_d_nnz; 7967 7968 /* 7969 Determine exact preallocation count for (sequential) scalar matrix 7970 */ 7971 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7972 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7973 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7974 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7975 PetscCall(PetscFree3(w0, w1, w2)); 7976 } else if (ismpiaij) { 7977 Mat Daij, Oaij; 7978 const PetscInt *garray; 7979 PetscInt max_d_nnz; 7980 7981 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7982 /* 7983 Determine exact preallocation count for diagonal block portion of scalar matrix 7984 */ 7985 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7986 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7987 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7988 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7989 PetscCall(PetscFree3(w0, w1, w2)); 7990 /* 7991 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7992 */ 7993 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7994 o_nnz[jj] = 0; 7995 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7996 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7997 o_nnz[jj] += ncols; 7998 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7999 } 8000 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8001 } 8002 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8003 /* get scalar copy (norms) of matrix */ 8004 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8005 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8006 PetscCall(PetscFree2(d_nnz, o_nnz)); 8007 for (Ii = Istart; Ii < Iend; Ii++) { 8008 PetscInt dest_row = Ii / bs; 8009 8010 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8011 for (jj = 0; jj < ncols; jj++) { 8012 PetscInt dest_col = idx[jj] / bs; 8013 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8014 8015 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8016 } 8017 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8018 } 8019 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8020 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8021 } 8022 } else { 8023 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8024 else { 8025 Gmat = Amat; 8026 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8027 } 8028 if (isseqaij) { 8029 a = Gmat; 8030 b = NULL; 8031 } else { 8032 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8033 a = d->A; 8034 b = d->B; 8035 } 8036 if (filter >= 0 || scale) { 8037 /* take absolute value of each entry */ 8038 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8039 MatInfo info; 8040 PetscScalar *avals; 8041 8042 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8043 PetscCall(MatSeqAIJGetArray(c, &avals)); 8044 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8045 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8046 } 8047 } 8048 } 8049 if (symmetrize) { 8050 PetscBool isset, issym; 8051 8052 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8053 if (!isset || !issym) { 8054 Mat matTrans; 8055 8056 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8057 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8058 PetscCall(MatDestroy(&matTrans)); 8059 } 8060 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8061 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8062 if (scale) { 8063 /* scale c for all diagonal values = 1 or -1 */ 8064 Vec diag; 8065 8066 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8067 PetscCall(MatGetDiagonal(Gmat, diag)); 8068 PetscCall(VecReciprocal(diag)); 8069 PetscCall(VecSqrtAbs(diag)); 8070 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8071 PetscCall(VecDestroy(&diag)); 8072 } 8073 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8074 if (filter >= 0) { 8075 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8076 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8077 } 8078 *a_Gmat = Gmat; 8079 PetscFunctionReturn(PETSC_SUCCESS); 8080 } 8081 8082 /* 8083 Special version for direct calls from Fortran 8084 */ 8085 8086 /* Change these macros so can be used in void function */ 8087 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8088 #undef PetscCall 8089 #define PetscCall(...) \ 8090 do { \ 8091 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8092 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8093 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8094 return; \ 8095 } \ 8096 } while (0) 8097 8098 #undef SETERRQ 8099 #define SETERRQ(comm, ierr, ...) \ 8100 do { \ 8101 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8102 return; \ 8103 } while (0) 8104 8105 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8106 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8107 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8108 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8109 #else 8110 #endif 8111 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8112 { 8113 Mat mat = *mmat; 8114 PetscInt m = *mm, n = *mn; 8115 InsertMode addv = *maddv; 8116 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8117 PetscScalar value; 8118 8119 MatCheckPreallocated(mat, 1); 8120 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8121 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8122 { 8123 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8124 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8125 PetscBool roworiented = aij->roworiented; 8126 8127 /* Some Variables required in the macro */ 8128 Mat A = aij->A; 8129 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8130 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8131 MatScalar *aa; 8132 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8133 Mat B = aij->B; 8134 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8135 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8136 MatScalar *ba; 8137 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8138 * cannot use "#if defined" inside a macro. */ 8139 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8140 8141 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8142 PetscInt nonew = a->nonew; 8143 MatScalar *ap1, *ap2; 8144 8145 PetscFunctionBegin; 8146 PetscCall(MatSeqAIJGetArray(A, &aa)); 8147 PetscCall(MatSeqAIJGetArray(B, &ba)); 8148 for (i = 0; i < m; i++) { 8149 if (im[i] < 0) continue; 8150 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8151 if (im[i] >= rstart && im[i] < rend) { 8152 row = im[i] - rstart; 8153 lastcol1 = -1; 8154 rp1 = aj + ai[row]; 8155 ap1 = aa + ai[row]; 8156 rmax1 = aimax[row]; 8157 nrow1 = ailen[row]; 8158 low1 = 0; 8159 high1 = nrow1; 8160 lastcol2 = -1; 8161 rp2 = bj + bi[row]; 8162 ap2 = ba + bi[row]; 8163 rmax2 = bimax[row]; 8164 nrow2 = bilen[row]; 8165 low2 = 0; 8166 high2 = nrow2; 8167 8168 for (j = 0; j < n; j++) { 8169 if (roworiented) value = v[i * n + j]; 8170 else value = v[i + j * m]; 8171 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8172 if (in[j] >= cstart && in[j] < cend) { 8173 col = in[j] - cstart; 8174 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8175 } else if (in[j] < 0) continue; 8176 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8177 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8178 } else { 8179 if (mat->was_assembled) { 8180 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8181 #if defined(PETSC_USE_CTABLE) 8182 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8183 col--; 8184 #else 8185 col = aij->colmap[in[j]] - 1; 8186 #endif 8187 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8188 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8189 col = in[j]; 8190 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8191 B = aij->B; 8192 b = (Mat_SeqAIJ *)B->data; 8193 bimax = b->imax; 8194 bi = b->i; 8195 bilen = b->ilen; 8196 bj = b->j; 8197 rp2 = bj + bi[row]; 8198 ap2 = ba + bi[row]; 8199 rmax2 = bimax[row]; 8200 nrow2 = bilen[row]; 8201 low2 = 0; 8202 high2 = nrow2; 8203 bm = aij->B->rmap->n; 8204 ba = b->a; 8205 inserted = PETSC_FALSE; 8206 } 8207 } else col = in[j]; 8208 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8209 } 8210 } 8211 } else if (!aij->donotstash) { 8212 if (roworiented) { 8213 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8214 } else { 8215 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8216 } 8217 } 8218 } 8219 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8220 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8221 } 8222 PetscFunctionReturnVoid(); 8223 } 8224 8225 /* Undefining these here since they were redefined from their original definition above! No 8226 * other PETSc functions should be defined past this point, as it is impossible to recover the 8227 * original definitions */ 8228 #undef PetscCall 8229 #undef SETERRQ 8230