1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 PetscMPIInt in; 310 311 PetscFunctionBegin; 312 PetscCall(MatGetSize(A, &m, &n)); 313 PetscCall(PetscCalloc1(n, &work)); 314 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 316 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 317 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 318 if (type == NORM_2) { 319 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 320 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 321 } else if (type == NORM_1) { 322 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 323 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 324 } else if (type == NORM_INFINITY) { 325 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 326 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 327 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 328 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 329 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 330 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 331 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 332 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 333 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 334 PetscCall(PetscMPIIntCast(n, &in)); 335 if (type == NORM_INFINITY) { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 337 } else { 338 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 339 } 340 PetscCall(PetscFree(work)); 341 if (type == NORM_2) { 342 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 343 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 344 for (i = 0; i < n; i++) reductions[i] /= m; 345 } 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 350 { 351 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 352 IS sis, gis; 353 const PetscInt *isis, *igis; 354 PetscInt n, *iis, nsis, ngis, rstart, i; 355 356 PetscFunctionBegin; 357 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 358 PetscCall(MatFindNonzeroRows(a->B, &gis)); 359 PetscCall(ISGetSize(gis, &ngis)); 360 PetscCall(ISGetSize(sis, &nsis)); 361 PetscCall(ISGetIndices(sis, &isis)); 362 PetscCall(ISGetIndices(gis, &igis)); 363 364 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 365 PetscCall(PetscArraycpy(iis, igis, ngis)); 366 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 367 n = ngis + nsis; 368 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 369 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 370 for (i = 0; i < n; i++) iis[i] += rstart; 371 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 372 373 PetscCall(ISRestoreIndices(sis, &isis)); 374 PetscCall(ISRestoreIndices(gis, &igis)); 375 PetscCall(ISDestroy(&sis)); 376 PetscCall(ISDestroy(&gis)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 /* 381 Local utility routine that creates a mapping from the global column 382 number to the local number in the off-diagonal part of the local 383 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 384 a slightly higher hash table cost; without it it is not scalable (each processor 385 has an order N integer array but is fast to access. 386 */ 387 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 388 { 389 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 390 PetscInt n = aij->B->cmap->n, i; 391 392 PetscFunctionBegin; 393 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 394 #if defined(PETSC_USE_CTABLE) 395 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 396 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 397 #else 398 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 399 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 400 #endif 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 405 do { \ 406 if (col <= lastcol1) low1 = 0; \ 407 else high1 = nrow1; \ 408 lastcol1 = col; \ 409 while (high1 - low1 > 5) { \ 410 t = (low1 + high1) / 2; \ 411 if (rp1[t] > col) high1 = t; \ 412 else low1 = t; \ 413 } \ 414 for (_i = low1; _i < high1; _i++) { \ 415 if (rp1[_i] > col) break; \ 416 if (rp1[_i] == col) { \ 417 if (addv == ADD_VALUES) { \ 418 ap1[_i] += value; \ 419 /* Not sure LogFlops will slow dow the code or not */ \ 420 (void)PetscLogFlops(1.0); \ 421 } else ap1[_i] = value; \ 422 goto a_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries && row != col) { \ 426 low1 = 0; \ 427 high1 = nrow1; \ 428 goto a_noinsert; \ 429 } \ 430 if (nonew == 1) { \ 431 low1 = 0; \ 432 high1 = nrow1; \ 433 goto a_noinsert; \ 434 } \ 435 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 436 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 437 N = nrow1++ - 1; \ 438 a->nz++; \ 439 high1++; \ 440 /* shift up all the later entries in this row */ \ 441 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 442 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 443 rp1[_i] = col; \ 444 ap1[_i] = value; \ 445 a_noinsert:; \ 446 ailen[row] = nrow1; \ 447 } while (0) 448 449 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 450 do { \ 451 if (col <= lastcol2) low2 = 0; \ 452 else high2 = nrow2; \ 453 lastcol2 = col; \ 454 while (high2 - low2 > 5) { \ 455 t = (low2 + high2) / 2; \ 456 if (rp2[t] > col) high2 = t; \ 457 else low2 = t; \ 458 } \ 459 for (_i = low2; _i < high2; _i++) { \ 460 if (rp2[_i] > col) break; \ 461 if (rp2[_i] == col) { \ 462 if (addv == ADD_VALUES) { \ 463 ap2[_i] += value; \ 464 (void)PetscLogFlops(1.0); \ 465 } else ap2[_i] = value; \ 466 goto b_noinsert; \ 467 } \ 468 } \ 469 if (value == 0.0 && ignorezeroentries) { \ 470 low2 = 0; \ 471 high2 = nrow2; \ 472 goto b_noinsert; \ 473 } \ 474 if (nonew == 1) { \ 475 low2 = 0; \ 476 high2 = nrow2; \ 477 goto b_noinsert; \ 478 } \ 479 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 480 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 481 N = nrow2++ - 1; \ 482 b->nz++; \ 483 high2++; \ 484 /* shift up all the later entries in this row */ \ 485 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 486 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 487 rp2[_i] = col; \ 488 ap2[_i] = value; \ 489 b_noinsert:; \ 490 bilen[row] = nrow2; \ 491 } while (0) 492 493 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 494 { 495 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 497 PetscInt l, *garray = mat->garray, diag; 498 PetscScalar *aa, *ba; 499 500 PetscFunctionBegin; 501 /* code only works for square matrices A */ 502 503 /* find size of row to the left of the diagonal part */ 504 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 505 row = row - diag; 506 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 507 if (garray[b->j[b->i[row] + l]] > diag) break; 508 } 509 if (l) { 510 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 511 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 512 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 513 } 514 515 /* diagonal part */ 516 if (a->i[row + 1] - a->i[row]) { 517 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 518 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 519 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 520 } 521 522 /* right of diagonal part */ 523 if (b->i[row + 1] - b->i[row] - l) { 524 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 525 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 526 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 527 } 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 534 PetscScalar value = 0.0; 535 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 536 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 537 PetscBool roworiented = aij->roworiented; 538 539 /* Some Variables required in the macro */ 540 Mat A = aij->A; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 542 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 546 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 547 MatScalar *aa, *ba; 548 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1, *ap2; 551 552 PetscFunctionBegin; 553 PetscCall(MatSeqAIJGetArray(A, &aa)); 554 PetscCall(MatSeqAIJGetArray(B, &ba)); 555 for (i = 0; i < m; i++) { 556 if (im[i] < 0) continue; 557 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 562 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 569 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j = 0; j < n; j++) { 576 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 577 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 nonew = a->nonew; 581 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 582 } else if (in[j] < 0) { 583 continue; 584 } else { 585 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 586 if (mat->was_assembled) { 587 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 588 #if defined(PETSC_USE_CTABLE) 589 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 590 col--; 591 #else 592 col = aij->colmap[in[j]] - 1; 593 #endif 594 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 595 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 596 col = in[j]; 597 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 598 B = aij->B; 599 b = (Mat_SeqAIJ *)B->data; 600 bimax = b->imax; 601 bi = b->i; 602 bilen = b->ilen; 603 bj = b->j; 604 ba = b->a; 605 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 606 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 614 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 615 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 616 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 617 } 618 } else col = in[j]; 619 nonew = b->nonew; 620 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 621 } 622 } 623 } else { 624 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 625 if (!aij->donotstash) { 626 mat->assembled = PETSC_FALSE; 627 if (roworiented) { 628 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } else { 630 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 631 } 632 } 633 } 634 } 635 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 636 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 /* 641 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 642 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 643 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 644 */ 645 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 648 Mat A = aij->A; /* diagonal part of the matrix */ 649 Mat B = aij->B; /* off-diagonal part of the matrix */ 650 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 651 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 652 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 653 PetscInt *ailen = a->ilen, *aj = a->j; 654 PetscInt *bilen = b->ilen, *bj = b->j; 655 PetscInt am = aij->A->rmap->n, j; 656 PetscInt diag_so_far = 0, dnz; 657 PetscInt offd_so_far = 0, onz; 658 659 PetscFunctionBegin; 660 /* Iterate over all rows of the matrix */ 661 for (j = 0; j < am; j++) { 662 dnz = onz = 0; 663 /* Iterate over all non-zero columns of the current row */ 664 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 665 /* If column is in the diagonal */ 666 if (mat_j[col] >= cstart && mat_j[col] < cend) { 667 aj[diag_so_far++] = mat_j[col] - cstart; 668 dnz++; 669 } else { /* off-diagonal entries */ 670 bj[offd_so_far++] = mat_j[col]; 671 onz++; 672 } 673 } 674 ailen[j] = dnz; 675 bilen[j] = onz; 676 } 677 PetscFunctionReturn(PETSC_SUCCESS); 678 } 679 680 /* 681 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 682 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 683 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 684 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 685 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 686 */ 687 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 688 { 689 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 690 Mat A = aij->A; /* diagonal part of the matrix */ 691 Mat B = aij->B; /* off-diagonal part of the matrix */ 692 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 694 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 695 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 696 PetscInt *ailen = a->ilen, *aj = a->j; 697 PetscInt *bilen = b->ilen, *bj = b->j; 698 PetscInt am = aij->A->rmap->n, j; 699 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 700 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 701 PetscScalar *aa = a->a, *ba = b->a; 702 703 PetscFunctionBegin; 704 /* Iterate over all rows of the matrix */ 705 for (j = 0; j < am; j++) { 706 dnz_row = onz_row = 0; 707 rowstart_offd = full_offd_i[j]; 708 rowstart_diag = full_diag_i[j]; 709 /* Iterate over all non-zero columns of the current row */ 710 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 711 /* If column is in the diagonal */ 712 if (mat_j[col] >= cstart && mat_j[col] < cend) { 713 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 714 aa[rowstart_diag + dnz_row] = mat_a[col]; 715 dnz_row++; 716 } else { /* off-diagonal entries */ 717 bj[rowstart_offd + onz_row] = mat_j[col]; 718 ba[rowstart_offd + onz_row] = mat_a[col]; 719 onz_row++; 720 } 721 } 722 ailen[j] = dnz_row; 723 bilen[j] = onz_row; 724 } 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 729 { 730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 731 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 733 734 PetscFunctionBegin; 735 for (i = 0; i < m; i++) { 736 if (idxm[i] < 0) continue; /* negative row */ 737 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 738 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 739 row = idxm[i] - rstart; 740 for (j = 0; j < n; j++) { 741 if (idxn[j] < 0) continue; /* negative column */ 742 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 746 } else { 747 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 748 #if defined(PETSC_USE_CTABLE) 749 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 750 col--; 751 #else 752 col = aij->colmap[idxn[j]] - 1; 753 #endif 754 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 755 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 756 } 757 } 758 } 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 765 PetscInt nstash, reallocs; 766 767 PetscFunctionBegin; 768 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 769 770 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 771 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 772 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 779 PetscMPIInt n; 780 PetscInt i, j, rstart, ncols, flg; 781 PetscInt *row, *col; 782 PetscBool other_disassembled; 783 PetscScalar *val; 784 785 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 786 787 PetscFunctionBegin; 788 if (!aij->donotstash && !mat->nooffprocentries) { 789 while (1) { 790 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 791 if (!flg) break; 792 793 for (i = 0; i < n;) { 794 /* Now identify the consecutive vals belonging to the same row */ 795 for (j = i, rstart = row[j]; j < n; j++) { 796 if (row[j] != rstart) break; 797 } 798 if (j < n) ncols = j - i; 799 else ncols = n - i; 800 /* Now assemble all these values with a single function call */ 801 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 802 i = j; 803 } 804 } 805 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 806 } 807 #if defined(PETSC_HAVE_DEVICE) 808 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 809 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 810 if (mat->boundtocpu) { 811 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 812 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 813 } 814 #endif 815 PetscCall(MatAssemblyBegin(aij->A, mode)); 816 PetscCall(MatAssemblyEnd(aij->A, mode)); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourself, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 825 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 826 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 827 PetscCall(MatDisAssemble_MPIAIJ(mat)); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 831 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 832 #if defined(PETSC_HAVE_DEVICE) 833 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 834 #endif 835 PetscCall(MatAssemblyBegin(aij->B, mode)); 836 PetscCall(MatAssemblyEnd(aij->B, mode)); 837 838 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 839 840 aij->rowvalues = NULL; 841 842 PetscCall(VecDestroy(&aij->diag)); 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 848 } 849 #if defined(PETSC_HAVE_DEVICE) 850 mat->offloadmask = PETSC_OFFLOAD_BOTH; 851 #endif 852 PetscFunctionReturn(PETSC_SUCCESS); 853 } 854 855 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 858 859 PetscFunctionBegin; 860 PetscCall(MatZeroEntries(l->A)); 861 PetscCall(MatZeroEntries(l->B)); 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 866 { 867 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 868 PetscInt *lrows; 869 PetscInt r, len; 870 PetscBool cong; 871 872 PetscFunctionBegin; 873 /* get locally owned rows */ 874 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 875 PetscCall(MatHasCongruentLayouts(A, &cong)); 876 /* fix right-hand side if needed */ 877 if (x && b) { 878 const PetscScalar *xx; 879 PetscScalar *bb; 880 881 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 882 PetscCall(VecGetArrayRead(x, &xx)); 883 PetscCall(VecGetArray(b, &bb)); 884 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 885 PetscCall(VecRestoreArrayRead(x, &xx)); 886 PetscCall(VecRestoreArray(b, &bb)); 887 } 888 889 if (diag != 0.0 && cong) { 890 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 891 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 893 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 894 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 895 PetscInt nnwA, nnwB; 896 PetscBool nnzA, nnzB; 897 898 nnwA = aijA->nonew; 899 nnwB = aijB->nonew; 900 nnzA = aijA->keepnonzeropattern; 901 nnzB = aijB->keepnonzeropattern; 902 if (!nnzA) { 903 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 904 aijA->nonew = 0; 905 } 906 if (!nnzB) { 907 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 908 aijB->nonew = 0; 909 } 910 /* Must zero here before the next loop */ 911 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 912 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 913 for (r = 0; r < len; ++r) { 914 const PetscInt row = lrows[r] + A->rmap->rstart; 915 if (row >= A->cmap->N) continue; 916 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 917 } 918 aijA->nonew = nnwA; 919 aijB->nonew = nnwB; 920 } else { 921 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 922 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 923 } 924 PetscCall(PetscFree(lrows)); 925 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 927 928 /* only change matrix nonzero state if pattern was allowed to be changed */ 929 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 930 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 931 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 937 { 938 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 939 PetscInt n = A->rmap->n; 940 PetscInt i, j, r, m, len = 0; 941 PetscInt *lrows, *owners = A->rmap->range; 942 PetscMPIInt p = 0; 943 PetscSFNode *rrows; 944 PetscSF sf; 945 const PetscScalar *xx; 946 PetscScalar *bb, *mask, *aij_a; 947 Vec xmask, lmask; 948 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 949 const PetscInt *aj, *ii, *ridx; 950 PetscScalar *aa; 951 952 PetscFunctionBegin; 953 /* Create SF where leaves are input rows and roots are owned rows */ 954 PetscCall(PetscMalloc1(n, &lrows)); 955 for (r = 0; r < n; ++r) lrows[r] = -1; 956 PetscCall(PetscMalloc1(N, &rrows)); 957 for (r = 0; r < N; ++r) { 958 const PetscInt idx = rows[r]; 959 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 960 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 961 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 962 } 963 rrows[r].rank = p; 964 rrows[r].index = rows[r] - owners[p]; 965 } 966 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 967 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 968 /* Collect flags for rows to be zeroed */ 969 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 970 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 971 PetscCall(PetscSFDestroy(&sf)); 972 /* Compress and put in row numbers */ 973 for (r = 0; r < n; ++r) 974 if (lrows[r] >= 0) lrows[len++] = r; 975 /* zero diagonal part of matrix */ 976 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 977 /* handle off-diagonal part of matrix */ 978 PetscCall(MatCreateVecs(A, &xmask, NULL)); 979 PetscCall(VecDuplicate(l->lvec, &lmask)); 980 PetscCall(VecGetArray(xmask, &bb)); 981 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 982 PetscCall(VecRestoreArray(xmask, &bb)); 983 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 984 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 985 PetscCall(VecDestroy(&xmask)); 986 if (x && b) { /* this code is buggy when the row and column layout don't match */ 987 PetscBool cong; 988 989 PetscCall(MatHasCongruentLayouts(A, &cong)); 990 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 991 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 992 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 993 PetscCall(VecGetArrayRead(l->lvec, &xx)); 994 PetscCall(VecGetArray(b, &bb)); 995 } 996 PetscCall(VecGetArray(lmask, &mask)); 997 /* remove zeroed rows of off-diagonal matrix */ 998 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 999 ii = aij->i; 1000 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 1001 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1002 if (aij->compressedrow.use) { 1003 m = aij->compressedrow.nrows; 1004 ii = aij->compressedrow.i; 1005 ridx = aij->compressedrow.rindex; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 1011 for (j = 0; j < n; j++) { 1012 if (PetscAbsScalar(mask[*aj])) { 1013 if (b) bb[*ridx] -= *aa * xx[*aj]; 1014 *aa = 0.0; 1015 } 1016 aa++; 1017 aj++; 1018 } 1019 ridx++; 1020 } 1021 } else { /* do not use compressed row format */ 1022 m = l->B->rmap->n; 1023 for (i = 0; i < m; i++) { 1024 n = ii[i + 1] - ii[i]; 1025 aj = aij->j + ii[i]; 1026 aa = aij_a + ii[i]; 1027 for (j = 0; j < n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[i] -= *aa * xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 } 1036 } 1037 if (x && b) { 1038 PetscCall(VecRestoreArray(b, &bb)); 1039 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1040 } 1041 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1042 PetscCall(VecRestoreArray(lmask, &mask)); 1043 PetscCall(VecDestroy(&lmask)); 1044 PetscCall(PetscFree(lrows)); 1045 1046 /* only change matrix nonzero state if pattern was allowed to be changed */ 1047 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1048 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1049 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1050 } 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1057 PetscInt nt; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecGetLocalSize(xx, &nt)); 1062 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->A, mult, xx, yy); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1076 PetscFunctionReturn(PETSC_SUCCESS); 1077 } 1078 1079 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1082 VecScatter Mvctx = a->Mvctx; 1083 1084 PetscFunctionBegin; 1085 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1087 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1088 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1095 1096 PetscFunctionBegin; 1097 /* do nondiagonal part */ 1098 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1099 /* do local part */ 1100 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1101 /* add partial results together */ 1102 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1103 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1108 { 1109 MPI_Comm comm; 1110 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1111 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1112 IS Me, Notme; 1113 PetscInt M, N, first, last, *notme, i; 1114 PetscBool lf; 1115 PetscMPIInt size; 1116 1117 PetscFunctionBegin; 1118 /* Easy test: symmetric diagonal block */ 1119 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1120 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1121 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1122 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1123 PetscCallMPI(MPI_Comm_size(comm, &size)); 1124 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1125 1126 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1127 PetscCall(MatGetSize(Amat, &M, &N)); 1128 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1129 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1130 for (i = 0; i < first; i++) notme[i] = i; 1131 for (i = last; i < M; i++) notme[i - last + first] = i; 1132 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1133 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1134 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1135 Aoff = Aoffs[0]; 1136 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1137 Boff = Boffs[0]; 1138 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1139 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1140 PetscCall(MatDestroyMatrices(1, &Boffs)); 1141 PetscCall(ISDestroy(&Me)); 1142 PetscCall(ISDestroy(&Notme)); 1143 PetscCall(PetscFree(notme)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1154 /* do local part */ 1155 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1156 /* add partial results together */ 1157 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1158 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1172 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1173 PetscCall(MatGetDiagonal(a->A, v)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCall(MatScale(a->A, aa)); 1183 PetscCall(MatScale(a->B, aa)); 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 } 1186 1187 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1192 const PetscInt *garray = aij->garray; 1193 const PetscScalar *aa, *ba; 1194 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1195 PetscInt64 nz, hnz; 1196 PetscInt *rowlens; 1197 PetscInt *colidxs; 1198 PetscScalar *matvals; 1199 PetscMPIInt rank; 1200 1201 PetscFunctionBegin; 1202 PetscCall(PetscViewerSetUp(viewer)); 1203 1204 M = mat->rmap->N; 1205 N = mat->cmap->N; 1206 m = mat->rmap->n; 1207 rs = mat->rmap->rstart; 1208 cs = mat->cmap->rstart; 1209 nz = A->nz + B->nz; 1210 1211 /* write matrix header */ 1212 header[0] = MAT_FILE_CLASSID; 1213 header[1] = M; 1214 header[2] = N; 1215 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1216 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1217 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1218 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1219 1220 /* fill in and store row lengths */ 1221 PetscCall(PetscMalloc1(m, &rowlens)); 1222 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1223 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1224 PetscCall(PetscFree(rowlens)); 1225 1226 /* fill in and store column indices */ 1227 PetscCall(PetscMalloc1(nz, &colidxs)); 1228 for (cnt = 0, i = 0; i < m; i++) { 1229 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1234 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1235 } 1236 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1237 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1238 PetscCall(PetscFree(colidxs)); 1239 1240 /* fill in and store nonzero values */ 1241 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1242 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1243 PetscCall(PetscMalloc1(nz, &matvals)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 matvals[cnt++] = ba[jb]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1250 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1251 } 1252 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1254 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1255 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1256 PetscCall(PetscFree(matvals)); 1257 1258 /* write block size option to the viewer's .info file */ 1259 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 #include <petscdraw.h> 1264 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1267 PetscMPIInt rank = aij->rank, size = aij->size; 1268 PetscBool isdraw, iascii, isbinary; 1269 PetscViewer sviewer; 1270 PetscViewerFormat format; 1271 1272 PetscFunctionBegin; 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1274 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1275 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1276 if (iascii) { 1277 PetscCall(PetscViewerGetFormat(viewer, &format)); 1278 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1279 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1280 PetscCall(PetscMalloc1(size, &nz)); 1281 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1282 for (i = 0; i < size; i++) { 1283 nmax = PetscMax(nmax, nz[i]); 1284 nmin = PetscMin(nmin, nz[i]); 1285 navg += nz[i]; 1286 } 1287 PetscCall(PetscFree(nz)); 1288 navg = navg / size; 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 PetscCall(PetscViewerGetFormat(viewer, &format)); 1293 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1294 MatInfo info; 1295 PetscInt *inodes = NULL; 1296 1297 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1298 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1299 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1300 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1301 if (!inodes) { 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1303 info.memory)); 1304 } else { 1305 PetscCall( 1306 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1307 } 1308 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1312 PetscCall(PetscViewerFlush(viewer)); 1313 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1315 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1318 PetscInt inodecount, inodelimit, *inodes; 1319 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1320 if (inodes) { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1322 } else { 1323 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1324 } 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 } else if (isbinary) { 1330 if (size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 } else { 1334 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1335 } 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } else if (iascii && size == 1) { 1338 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1339 PetscCall(MatView(aij->A, viewer)); 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } else if (isdraw) { 1342 PetscDraw draw; 1343 PetscBool isnull; 1344 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1345 PetscCall(PetscDrawIsNull(draw, &isnull)); 1346 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 { /* assemble the entire matrix onto first processor */ 1350 Mat A = NULL, Av; 1351 IS isrow, iscol; 1352 1353 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1355 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1356 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1357 /* The commented code uses MatCreateSubMatrices instead */ 1358 /* 1359 Mat *AA, A = NULL, Av; 1360 IS isrow,iscol; 1361 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1363 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1364 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1365 if (rank == 0) { 1366 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1367 A = AA[0]; 1368 Av = AA[0]; 1369 } 1370 PetscCall(MatDestroySubMatrices(1,&AA)); 1371 */ 1372 PetscCall(ISDestroy(&iscol)); 1373 PetscCall(ISDestroy(&isrow)); 1374 /* 1375 Everyone has to call to draw the matrix since the graphics waits are 1376 synchronized across all processors that share the PetscDraw object 1377 */ 1378 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1379 if (rank == 0) { 1380 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1381 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1382 } 1383 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1384 PetscCall(MatDestroy(&A)); 1385 } 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1390 { 1391 PetscBool iascii, isdraw, issocket, isbinary; 1392 1393 PetscFunctionBegin; 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1396 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1397 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1398 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1403 { 1404 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1405 Vec bb1 = NULL; 1406 PetscBool hasop; 1407 1408 PetscFunctionBegin; 1409 if (flag == SOR_APPLY_UPPER) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1415 1416 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417 if (flag & SOR_ZERO_INITIAL_GUESS) { 1418 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1419 its--; 1420 } 1421 1422 while (its--) { 1423 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1424 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1425 1426 /* update rhs: bb1 = bb - B*x */ 1427 PetscCall(VecScale(mat->lvec, -1.0)); 1428 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1429 1430 /* local sweep */ 1431 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1432 } 1433 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 while (its--) { 1439 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1440 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 1442 /* update rhs: bb1 = bb - B*x */ 1443 PetscCall(VecScale(mat->lvec, -1.0)); 1444 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1445 1446 /* local sweep */ 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1448 } 1449 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1452 its--; 1453 } 1454 while (its--) { 1455 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1456 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 PetscCall(VecScale(mat->lvec, -1.0)); 1460 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1464 } 1465 } else if (flag & SOR_EISENSTAT) { 1466 Vec xx1; 1467 1468 PetscCall(VecDuplicate(bb, &xx1)); 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1470 1471 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1472 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 if (!mat->diag) { 1474 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1475 PetscCall(MatGetDiagonal(matin, mat->diag)); 1476 } 1477 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1478 if (hasop) { 1479 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1480 } else { 1481 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1482 } 1483 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1484 1485 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1489 PetscCall(VecAXPY(xx, 1.0, xx1)); 1490 PetscCall(VecDestroy(&xx1)); 1491 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1492 1493 PetscCall(VecDestroy(&bb1)); 1494 1495 matin->factorerrortype = mat->A->factorerrortype; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1500 { 1501 Mat aA, aB, Aperm; 1502 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1503 PetscScalar *aa, *ba; 1504 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1505 PetscSF rowsf, sf; 1506 IS parcolp = NULL; 1507 PetscBool done; 1508 1509 PetscFunctionBegin; 1510 PetscCall(MatGetLocalSize(A, &m, &n)); 1511 PetscCall(ISGetIndices(rowp, &rwant)); 1512 PetscCall(ISGetIndices(colp, &cwant)); 1513 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1517 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1518 PetscCall(PetscSFSetFromOptions(rowsf)); 1519 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1525 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1526 PetscCall(PetscSFSetFromOptions(sf)); 1527 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1528 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1529 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1530 PetscCall(PetscSFDestroy(&sf)); 1531 1532 PetscCall(ISRestoreIndices(rowp, &rwant)); 1533 PetscCall(ISRestoreIndices(colp, &cwant)); 1534 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1535 1536 /* Find out where my gcols should go */ 1537 PetscCall(MatGetSize(aB, NULL, &ng)); 1538 PetscCall(PetscMalloc1(ng, &gcdest)); 1539 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1540 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1541 PetscCall(PetscSFSetFromOptions(sf)); 1542 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1544 PetscCall(PetscSFDestroy(&sf)); 1545 1546 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1547 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1548 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1549 for (i = 0; i < m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1553 for (j = ai[i]; j < ai[i + 1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j = bi[i]; j < bi[i + 1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1570 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1571 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1572 PetscCall(PetscSFDestroy(&rowsf)); 1573 1574 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1575 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1576 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1577 for (i = 0; i < m; i++) { 1578 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0, rowlen; 1580 rowlen = ai[i + 1] - ai[i]; 1581 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1583 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1584 } 1585 rowlen = bi[i + 1] - bi[i]; 1586 for (j0 = j = 0; j < rowlen; j0 = j) { 1587 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1588 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1589 } 1590 } 1591 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1592 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1594 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1595 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1596 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1597 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1598 PetscCall(PetscFree3(work, rdest, cdest)); 1599 PetscCall(PetscFree(gcdest)); 1600 if (parcolp) PetscCall(ISDestroy(&colp)); 1601 *B = Aperm; 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1608 1609 PetscFunctionBegin; 1610 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1611 if (ghosts) *ghosts = aij->garray; 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1616 { 1617 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1618 Mat A = mat->A, B = mat->B; 1619 PetscLogDouble isend[5], irecv[5]; 1620 1621 PetscFunctionBegin; 1622 info->block_size = 1.0; 1623 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1624 1625 isend[0] = info->nz_used; 1626 isend[1] = info->nz_allocated; 1627 isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; 1629 isend[4] = info->mallocs; 1630 1631 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1632 1633 isend[0] += info->nz_used; 1634 isend[1] += info->nz_allocated; 1635 isend[2] += info->nz_unneeded; 1636 isend[3] += info->memory; 1637 isend[4] += info->mallocs; 1638 if (flag == MAT_LOCAL) { 1639 info->nz_used = isend[0]; 1640 info->nz_allocated = isend[1]; 1641 info->nz_unneeded = isend[2]; 1642 info->memory = isend[3]; 1643 info->mallocs = isend[4]; 1644 } else if (flag == MAT_GLOBAL_MAX) { 1645 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } else if (flag == MAT_GLOBAL_SUM) { 1653 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1654 1655 info->nz_used = irecv[0]; 1656 info->nz_allocated = irecv[1]; 1657 info->nz_unneeded = irecv[2]; 1658 info->memory = irecv[3]; 1659 info->mallocs = irecv[4]; 1660 } 1661 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1662 info->fill_ratio_needed = 0; 1663 info->factor_mallocs = 0; 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1668 { 1669 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1670 1671 PetscFunctionBegin; 1672 switch (op) { 1673 case MAT_NEW_NONZERO_LOCATIONS: 1674 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1675 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1676 case MAT_KEEP_NONZERO_PATTERN: 1677 case MAT_NEW_NONZERO_LOCATION_ERR: 1678 case MAT_USE_INODES: 1679 case MAT_IGNORE_ZERO_ENTRIES: 1680 case MAT_FORM_EXPLICIT_TRANSPOSE: 1681 MatCheckPreallocated(A, 1); 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_ROW_ORIENTED: 1686 MatCheckPreallocated(A, 1); 1687 a->roworiented = flg; 1688 1689 PetscCall(MatSetOption(a->A, op, flg)); 1690 PetscCall(MatSetOption(a->B, op, flg)); 1691 break; 1692 case MAT_FORCE_DIAGONAL_ENTRIES: 1693 case MAT_SORTED_FULL: 1694 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1695 break; 1696 case MAT_IGNORE_OFF_PROC_ENTRIES: 1697 a->donotstash = flg; 1698 break; 1699 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1700 case MAT_SPD: 1701 case MAT_SYMMETRIC: 1702 case MAT_STRUCTURALLY_SYMMETRIC: 1703 case MAT_HERMITIAN: 1704 case MAT_SYMMETRY_ETERNAL: 1705 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1706 case MAT_SPD_ETERNAL: 1707 /* if the diagonal matrix is square it inherits some of the properties above */ 1708 break; 1709 case MAT_SUBMAT_SINGLEIS: 1710 A->submat_singleis = flg; 1711 break; 1712 case MAT_STRUCTURE_ONLY: 1713 /* The option is handled directly by MatSetOption() */ 1714 break; 1715 default: 1716 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1717 } 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1722 { 1723 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1724 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1725 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1726 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1727 PetscInt *cmap, *idx_p; 1728 1729 PetscFunctionBegin; 1730 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1731 mat->getrowactive = PETSC_TRUE; 1732 1733 if (!mat->rowvalues && (idx || v)) { 1734 /* 1735 allocate enough space to hold information from the longest row. 1736 */ 1737 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1738 PetscInt max = 1, tmp; 1739 for (i = 0; i < matin->rmap->n; i++) { 1740 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1741 if (max < tmp) max = tmp; 1742 } 1743 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1744 } 1745 1746 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1747 lrow = row - rstart; 1748 1749 pvA = &vworkA; 1750 pcA = &cworkA; 1751 pvB = &vworkB; 1752 pcB = &cworkB; 1753 if (!v) { 1754 pvA = NULL; 1755 pvB = NULL; 1756 } 1757 if (!idx) { 1758 pcA = NULL; 1759 if (!v) pcB = NULL; 1760 } 1761 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1762 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1763 nztot = nzA + nzB; 1764 1765 cmap = mat->garray; 1766 if (v || idx) { 1767 if (nztot) { 1768 /* Sort by increasing column numbers, assuming A and B already sorted */ 1769 PetscInt imark = -1; 1770 if (v) { 1771 *v = v_p = mat->rowvalues; 1772 for (i = 0; i < nzB; i++) { 1773 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1774 else break; 1775 } 1776 imark = i; 1777 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1778 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1779 } 1780 if (idx) { 1781 *idx = idx_p = mat->rowindices; 1782 if (imark > -1) { 1783 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1784 } else { 1785 for (i = 0; i < nzB; i++) { 1786 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1787 else break; 1788 } 1789 imark = i; 1790 } 1791 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1792 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1793 } 1794 } else { 1795 if (idx) *idx = NULL; 1796 if (v) *v = NULL; 1797 } 1798 } 1799 *nz = nztot; 1800 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1801 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1808 1809 PetscFunctionBegin; 1810 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1811 aij->getrowactive = PETSC_FALSE; 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1816 { 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1818 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1819 PetscInt i, j, cstart = mat->cmap->rstart; 1820 PetscReal sum = 0.0; 1821 const MatScalar *v, *amata, *bmata; 1822 PetscMPIInt iN; 1823 1824 PetscFunctionBegin; 1825 if (aij->size == 1) { 1826 PetscCall(MatNorm(aij->A, type, norm)); 1827 } else { 1828 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1829 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1830 if (type == NORM_FROBENIUS) { 1831 v = amata; 1832 for (i = 0; i < amat->nz; i++) { 1833 sum += PetscRealPart(PetscConj(*v) * (*v)); 1834 v++; 1835 } 1836 v = bmata; 1837 for (i = 0; i < bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v) * (*v)); 1839 v++; 1840 } 1841 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1842 *norm = PetscSqrtReal(*norm); 1843 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1844 } else if (type == NORM_1) { /* max column norm */ 1845 PetscReal *tmp, *tmp2; 1846 PetscInt *jj, *garray = aij->garray; 1847 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1848 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1849 *norm = 0.0; 1850 v = amata; 1851 jj = amat->j; 1852 for (j = 0; j < amat->nz; j++) { 1853 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = bmata; 1857 jj = bmat->j; 1858 for (j = 0; j < bmat->nz; j++) { 1859 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1860 v++; 1861 } 1862 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1863 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1864 for (j = 0; j < mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 PetscCall(PetscFree(tmp)); 1868 PetscCall(PetscFree(tmp2)); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j = 0; j < aij->A->rmap->n; j++) { 1873 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1874 sum = 0.0; 1875 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); 1877 v++; 1878 } 1879 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1880 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); 1882 v++; 1883 } 1884 if (sum > ntemp) ntemp = sum; 1885 } 1886 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1890 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1891 } 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1898 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1899 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1900 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1901 Mat B, A_diag, *B_diag; 1902 const MatScalar *pbv, *bv; 1903 1904 PetscFunctionBegin; 1905 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1906 ma = A->rmap->n; 1907 na = A->cmap->n; 1908 mb = a->B->rmap->n; 1909 nb = a->B->cmap->n; 1910 ai = Aloc->i; 1911 aj = Aloc->j; 1912 bi = Bloc->i; 1913 bj = Bloc->j; 1914 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1915 PetscInt *d_nnz, *g_nnz, *o_nnz; 1916 PetscSFNode *oloc; 1917 PETSC_UNUSED PetscSF sf; 1918 1919 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1920 /* compute d_nnz for preallocation */ 1921 PetscCall(PetscArrayzero(d_nnz, na)); 1922 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1923 /* compute local off-diagonal contributions */ 1924 PetscCall(PetscArrayzero(g_nnz, nb)); 1925 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1926 /* map those to global */ 1927 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1928 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1929 PetscCall(PetscSFSetFromOptions(sf)); 1930 PetscCall(PetscArrayzero(o_nnz, na)); 1931 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1932 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1933 PetscCall(PetscSFDestroy(&sf)); 1934 1935 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1936 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1937 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1938 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1939 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1940 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1941 } else { 1942 B = *matout; 1943 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1944 } 1945 1946 b = (Mat_MPIAIJ *)B->data; 1947 A_diag = a->A; 1948 B_diag = &b->A; 1949 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1950 A_diag_ncol = A_diag->cmap->N; 1951 B_diag_ilen = sub_B_diag->ilen; 1952 B_diag_i = sub_B_diag->i; 1953 1954 /* Set ilen for diagonal of B */ 1955 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1956 1957 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1958 very quickly (=without using MatSetValues), because all writes are local. */ 1959 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1960 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1961 1962 /* copy over the B part */ 1963 PetscCall(PetscMalloc1(bi[mb], &cols)); 1964 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i = 0; i < mb; i++) { 1970 ncol = bi[i + 1] - bi[i]; 1971 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1972 row++; 1973 if (pbv) pbv += ncol; 1974 if (cols_tmp) cols_tmp += ncol; 1975 } 1976 PetscCall(PetscFree(cols)); 1977 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1978 1979 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1981 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1982 *matout = B; 1983 } else { 1984 PetscCall(MatHeaderMerge(A, &B)); 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1990 { 1991 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1992 Mat a = aij->A, b = aij->B; 1993 PetscInt s1, s2, s3; 1994 1995 PetscFunctionBegin; 1996 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1997 if (rr) { 1998 PetscCall(VecGetLocalSize(rr, &s1)); 1999 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 } 2003 if (ll) { 2004 PetscCall(VecGetLocalSize(ll, &s1)); 2005 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2006 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2007 } 2008 /* scale the diagonal block */ 2009 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2014 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2015 } 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(MatSetUnfactored(a->A)); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2029 { 2030 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2031 Mat a, b, c, d; 2032 PetscBool flg; 2033 2034 PetscFunctionBegin; 2035 a = matA->A; 2036 b = matA->B; 2037 c = matB->A; 2038 d = matB->B; 2039 2040 PetscCall(MatEqual(a, c, &flg)); 2041 if (flg) PetscCall(MatEqual(b, d, &flg)); 2042 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2047 { 2048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2049 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2050 2051 PetscFunctionBegin; 2052 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2053 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2054 /* because of the column compression in the off-processor part of the matrix a->B, 2055 the number of columns in a->B and b->B may be different, hence we cannot call 2056 the MatCopy() directly on the two parts. If need be, we can provide a more 2057 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2058 then copying the submatrices */ 2059 PetscCall(MatCopy_Basic(A, B, str)); 2060 } else { 2061 PetscCall(MatCopy(a->A, b->A, str)); 2062 PetscCall(MatCopy(a->B, b->B, str)); 2063 } 2064 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2073 { 2074 PetscInt i, j, k, nzx, nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i = 0; i < m; i++) { 2079 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2080 nzx = xi[i + 1] - xi[i]; 2081 nzy = yi[i + 1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2084 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k < nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2112 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d, *nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2124 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2129 PetscCall(MatHeaderMerge(Y, &B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2173 PetscInt i, *idxb = NULL, m = A->rmap->n; 2174 PetscScalar *va, *vv; 2175 Vec vB, vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA, &va)); 2183 if (idx) { 2184 for (i = 0; i < m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2190 PetscCall(PetscMalloc1(m, &idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v, &vv)); 2194 PetscCall(VecGetArrayRead(vB, &vb)); 2195 for (i = 0; i < m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2202 } 2203 } 2204 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2205 PetscCall(VecRestoreArrayWrite(vA, &va)); 2206 PetscCall(VecRestoreArrayRead(vB, &vb)); 2207 PetscCall(PetscFree(idxb)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscCall(VecDestroy(&vB)); 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2214 { 2215 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2216 Vec vB, vA; 2217 2218 PetscFunctionBegin; 2219 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2220 PetscCall(MatGetRowSumAbs(a->A, vA)); 2221 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2222 PetscCall(MatGetRowSumAbs(a->B, vB)); 2223 PetscCall(VecAXPY(vA, 1.0, vB)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscCall(VecCopy(vA, v)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } 2345 2346 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not Collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 PetscBool isaij; 2644 2645 PetscFunctionBegin; 2646 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2647 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2648 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@ 2653 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2654 2655 Collective 2656 2657 Input Parameters: 2658 + A - the matrix 2659 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2660 2661 Level: advanced 2662 2663 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2664 @*/ 2665 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2666 { 2667 PetscFunctionBegin; 2668 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2673 { 2674 PetscBool sc = PETSC_FALSE, flg; 2675 2676 PetscFunctionBegin; 2677 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2678 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2679 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2680 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2681 PetscOptionsHeadEnd(); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2686 { 2687 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2688 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2689 2690 PetscFunctionBegin; 2691 if (!Y->preallocated) { 2692 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2693 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2694 PetscInt nonew = aij->nonew; 2695 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2696 aij->nonew = nonew; 2697 } 2698 PetscCall(MatShift_Basic(Y, a)); 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2703 { 2704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2708 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2709 if (d) { 2710 PetscInt rstart; 2711 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2712 *d += rstart; 2713 } 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2718 { 2719 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2720 2721 PetscFunctionBegin; 2722 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2727 { 2728 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2729 2730 PetscFunctionBegin; 2731 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2732 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2733 PetscFunctionReturn(PETSC_SUCCESS); 2734 } 2735 2736 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2737 MatGetRow_MPIAIJ, 2738 MatRestoreRow_MPIAIJ, 2739 MatMult_MPIAIJ, 2740 /* 4*/ MatMultAdd_MPIAIJ, 2741 MatMultTranspose_MPIAIJ, 2742 MatMultTransposeAdd_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*10*/ NULL, 2747 NULL, 2748 NULL, 2749 MatSOR_MPIAIJ, 2750 MatTranspose_MPIAIJ, 2751 /*15*/ MatGetInfo_MPIAIJ, 2752 MatEqual_MPIAIJ, 2753 MatGetDiagonal_MPIAIJ, 2754 MatDiagonalScale_MPIAIJ, 2755 MatNorm_MPIAIJ, 2756 /*20*/ MatAssemblyBegin_MPIAIJ, 2757 MatAssemblyEnd_MPIAIJ, 2758 MatSetOption_MPIAIJ, 2759 MatZeroEntries_MPIAIJ, 2760 /*24*/ MatZeroRows_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /*29*/ MatSetUp_MPI_Hash, 2766 NULL, 2767 NULL, 2768 MatGetDiagonalBlock_MPIAIJ, 2769 NULL, 2770 /*34*/ MatDuplicate_MPIAIJ, 2771 NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 /*39*/ MatAXPY_MPIAIJ, 2776 MatCreateSubMatrices_MPIAIJ, 2777 MatIncreaseOverlap_MPIAIJ, 2778 MatGetValues_MPIAIJ, 2779 MatCopy_MPIAIJ, 2780 /*44*/ MatGetRowMax_MPIAIJ, 2781 MatScale_MPIAIJ, 2782 MatShift_MPIAIJ, 2783 MatDiagonalSet_MPIAIJ, 2784 MatZeroRowsColumns_MPIAIJ, 2785 /*49*/ MatSetRandom_MPIAIJ, 2786 MatGetRowIJ_MPIAIJ, 2787 MatRestoreRowIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*54*/ MatFDColoringCreate_MPIXAIJ, 2791 NULL, 2792 MatSetUnfactored_MPIAIJ, 2793 MatPermute_MPIAIJ, 2794 NULL, 2795 /*59*/ MatCreateSubMatrix_MPIAIJ, 2796 MatDestroy_MPIAIJ, 2797 MatView_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*64*/ NULL, 2801 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2806 MatGetRowMinAbs_MPIAIJ, 2807 NULL, 2808 NULL, 2809 NULL, 2810 NULL, 2811 /*75*/ MatFDColoringApply_AIJ, 2812 MatSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatFindZeroDiagonals_MPIAIJ, 2816 /*80*/ NULL, 2817 NULL, 2818 NULL, 2819 /*83*/ MatLoad_MPIAIJ, 2820 NULL, 2821 NULL, 2822 NULL, 2823 NULL, 2824 NULL, 2825 /*89*/ NULL, 2826 NULL, 2827 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2831 NULL, 2832 NULL, 2833 NULL, 2834 MatBindToCPU_MPIAIJ, 2835 /*99*/ MatProductSetFromOptions_MPIAIJ, 2836 NULL, 2837 NULL, 2838 MatConjugate_MPIAIJ, 2839 NULL, 2840 /*104*/ MatSetValuesRow_MPIAIJ, 2841 MatRealPart_MPIAIJ, 2842 MatImaginaryPart_MPIAIJ, 2843 NULL, 2844 NULL, 2845 /*109*/ NULL, 2846 NULL, 2847 MatGetRowMin_MPIAIJ, 2848 NULL, 2849 MatMissingDiagonal_MPIAIJ, 2850 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2851 NULL, 2852 MatGetGhosts_MPIAIJ, 2853 NULL, 2854 NULL, 2855 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2856 NULL, 2857 NULL, 2858 NULL, 2859 MatGetMultiProcBlock_MPIAIJ, 2860 /*124*/ MatFindNonzeroRows_MPIAIJ, 2861 MatGetColumnReductions_MPIAIJ, 2862 MatInvertBlockDiagonal_MPIAIJ, 2863 MatInvertVariableBlockDiagonal_MPIAIJ, 2864 MatCreateSubMatricesMPI_MPIAIJ, 2865 /*129*/ NULL, 2866 NULL, 2867 NULL, 2868 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2869 NULL, 2870 /*134*/ NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 NULL, 2875 /*139*/ MatSetBlockSizes_MPIAIJ, 2876 NULL, 2877 NULL, 2878 MatFDColoringSetUp_MPIXAIJ, 2879 MatFindOffBlockDiagonalEntries_MPIAIJ, 2880 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2881 /*145*/ NULL, 2882 NULL, 2883 NULL, 2884 MatCreateGraph_Simple_AIJ, 2885 NULL, 2886 /*150*/ NULL, 2887 MatEliminateZeros_MPIAIJ, 2888 MatGetRowSumAbs_MPIAIJ, 2889 NULL, 2890 NULL, 2891 /*155*/ NULL, 2892 MatCopyHashToXAIJ_MPI_Hash}; 2893 2894 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2895 { 2896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2897 2898 PetscFunctionBegin; 2899 PetscCall(MatStoreValues(aij->A)); 2900 PetscCall(MatStoreValues(aij->B)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2905 { 2906 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2907 2908 PetscFunctionBegin; 2909 PetscCall(MatRetrieveValues(aij->A)); 2910 PetscCall(MatRetrieveValues(aij->B)); 2911 PetscFunctionReturn(PETSC_SUCCESS); 2912 } 2913 2914 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2915 { 2916 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2917 PetscMPIInt size; 2918 2919 PetscFunctionBegin; 2920 if (B->hash_active) { 2921 B->ops[0] = b->cops; 2922 B->hash_active = PETSC_FALSE; 2923 } 2924 PetscCall(PetscLayoutSetUp(B->rmap)); 2925 PetscCall(PetscLayoutSetUp(B->cmap)); 2926 2927 #if defined(PETSC_USE_CTABLE) 2928 PetscCall(PetscHMapIDestroy(&b->colmap)); 2929 #else 2930 PetscCall(PetscFree(b->colmap)); 2931 #endif 2932 PetscCall(PetscFree(b->garray)); 2933 PetscCall(VecDestroy(&b->lvec)); 2934 PetscCall(VecScatterDestroy(&b->Mvctx)); 2935 2936 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2937 2938 MatSeqXAIJGetOptions_Private(b->B); 2939 PetscCall(MatDestroy(&b->B)); 2940 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2941 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2942 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2943 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2944 MatSeqXAIJRestoreOptions_Private(b->B); 2945 2946 MatSeqXAIJGetOptions_Private(b->A); 2947 PetscCall(MatDestroy(&b->A)); 2948 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2949 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2950 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2951 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2952 MatSeqXAIJRestoreOptions_Private(b->A); 2953 2954 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2955 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2956 B->preallocated = PETSC_TRUE; 2957 B->was_assembled = PETSC_FALSE; 2958 B->assembled = PETSC_FALSE; 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2963 { 2964 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2965 2966 PetscFunctionBegin; 2967 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2968 PetscCall(PetscLayoutSetUp(B->rmap)); 2969 PetscCall(PetscLayoutSetUp(B->cmap)); 2970 2971 #if defined(PETSC_USE_CTABLE) 2972 PetscCall(PetscHMapIDestroy(&b->colmap)); 2973 #else 2974 PetscCall(PetscFree(b->colmap)); 2975 #endif 2976 PetscCall(PetscFree(b->garray)); 2977 PetscCall(VecDestroy(&b->lvec)); 2978 PetscCall(VecScatterDestroy(&b->Mvctx)); 2979 2980 PetscCall(MatResetPreallocation(b->A)); 2981 PetscCall(MatResetPreallocation(b->B)); 2982 B->preallocated = PETSC_TRUE; 2983 B->was_assembled = PETSC_FALSE; 2984 B->assembled = PETSC_FALSE; 2985 PetscFunctionReturn(PETSC_SUCCESS); 2986 } 2987 2988 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2989 { 2990 Mat mat; 2991 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2992 2993 PetscFunctionBegin; 2994 *newmat = NULL; 2995 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2996 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2997 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2998 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2999 a = (Mat_MPIAIJ *)mat->data; 3000 3001 mat->factortype = matin->factortype; 3002 mat->assembled = matin->assembled; 3003 mat->insertmode = NOT_SET_VALUES; 3004 3005 a->size = oldmat->size; 3006 a->rank = oldmat->rank; 3007 a->donotstash = oldmat->donotstash; 3008 a->roworiented = oldmat->roworiented; 3009 a->rowindices = NULL; 3010 a->rowvalues = NULL; 3011 a->getrowactive = PETSC_FALSE; 3012 3013 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3014 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3015 if (matin->hash_active) { 3016 PetscCall(MatSetUp(mat)); 3017 } else { 3018 mat->preallocated = matin->preallocated; 3019 if (oldmat->colmap) { 3020 #if defined(PETSC_USE_CTABLE) 3021 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3022 #else 3023 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3024 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3025 #endif 3026 } else a->colmap = NULL; 3027 if (oldmat->garray) { 3028 PetscInt len; 3029 len = oldmat->B->cmap->n; 3030 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3031 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3032 } else a->garray = NULL; 3033 3034 /* It may happen MatDuplicate is called with a non-assembled matrix 3035 In fact, MatDuplicate only requires the matrix to be preallocated 3036 This may happen inside a DMCreateMatrix_Shell */ 3037 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3038 if (oldmat->Mvctx) { 3039 a->Mvctx = oldmat->Mvctx; 3040 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3041 } 3042 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3043 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3044 } 3045 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3046 *newmat = mat; 3047 PetscFunctionReturn(PETSC_SUCCESS); 3048 } 3049 3050 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3051 { 3052 PetscBool isbinary, ishdf5; 3053 3054 PetscFunctionBegin; 3055 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3056 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3057 /* force binary viewer to load .info file if it has not yet done so */ 3058 PetscCall(PetscViewerSetUp(viewer)); 3059 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3060 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3061 if (isbinary) { 3062 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3063 } else if (ishdf5) { 3064 #if defined(PETSC_HAVE_HDF5) 3065 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3066 #else 3067 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3068 #endif 3069 } else { 3070 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3071 } 3072 PetscFunctionReturn(PETSC_SUCCESS); 3073 } 3074 3075 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3076 { 3077 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3078 PetscInt *rowidxs, *colidxs; 3079 PetscScalar *matvals; 3080 3081 PetscFunctionBegin; 3082 PetscCall(PetscViewerSetUp(viewer)); 3083 3084 /* read in matrix header */ 3085 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3086 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3087 M = header[1]; 3088 N = header[2]; 3089 nz = header[3]; 3090 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3091 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3092 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3093 3094 /* set block sizes from the viewer's .info file */ 3095 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3096 /* set global sizes if not set already */ 3097 if (mat->rmap->N < 0) mat->rmap->N = M; 3098 if (mat->cmap->N < 0) mat->cmap->N = N; 3099 PetscCall(PetscLayoutSetUp(mat->rmap)); 3100 PetscCall(PetscLayoutSetUp(mat->cmap)); 3101 3102 /* check if the matrix sizes are correct */ 3103 PetscCall(MatGetSize(mat, &rows, &cols)); 3104 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3105 3106 /* read in row lengths and build row indices */ 3107 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3108 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3109 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3110 rowidxs[0] = 0; 3111 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3112 if (nz != PETSC_INT_MAX) { 3113 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3114 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3115 } 3116 3117 /* read in column indices and matrix values */ 3118 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3119 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3120 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3121 /* store matrix indices and values */ 3122 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3123 PetscCall(PetscFree(rowidxs)); 3124 PetscCall(PetscFree2(colidxs, matvals)); 3125 PetscFunctionReturn(PETSC_SUCCESS); 3126 } 3127 3128 /* Not scalable because of ISAllGather() unless getting all columns. */ 3129 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3130 { 3131 IS iscol_local; 3132 PetscBool isstride; 3133 PetscMPIInt lisstride = 0, gisstride; 3134 3135 PetscFunctionBegin; 3136 /* check if we are grabbing all columns*/ 3137 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3138 3139 if (isstride) { 3140 PetscInt start, len, mstart, mlen; 3141 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3142 PetscCall(ISGetLocalSize(iscol, &len)); 3143 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3144 if (mstart == start && mlen - mstart == len) lisstride = 1; 3145 } 3146 3147 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3148 if (gisstride) { 3149 PetscInt N; 3150 PetscCall(MatGetSize(mat, NULL, &N)); 3151 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3152 PetscCall(ISSetIdentity(iscol_local)); 3153 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3154 } else { 3155 PetscInt cbs; 3156 PetscCall(ISGetBlockSize(iscol, &cbs)); 3157 PetscCall(ISAllGather(iscol, &iscol_local)); 3158 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3159 } 3160 3161 *isseq = iscol_local; 3162 PetscFunctionReturn(PETSC_SUCCESS); 3163 } 3164 3165 /* 3166 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3167 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3168 3169 Input Parameters: 3170 + mat - matrix 3171 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3172 i.e., mat->rstart <= isrow[i] < mat->rend 3173 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3174 i.e., mat->cstart <= iscol[i] < mat->cend 3175 3176 Output Parameters: 3177 + isrow_d - sequential row index set for retrieving mat->A 3178 . iscol_d - sequential column index set for retrieving mat->A 3179 . iscol_o - sequential column index set for retrieving mat->B 3180 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3181 */ 3182 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3183 { 3184 Vec x, cmap; 3185 const PetscInt *is_idx; 3186 PetscScalar *xarray, *cmaparray; 3187 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3188 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3189 Mat B = a->B; 3190 Vec lvec = a->lvec, lcmap; 3191 PetscInt i, cstart, cend, Bn = B->cmap->N; 3192 MPI_Comm comm; 3193 VecScatter Mvctx = a->Mvctx; 3194 3195 PetscFunctionBegin; 3196 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3197 PetscCall(ISGetLocalSize(iscol, &ncols)); 3198 3199 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3200 PetscCall(MatCreateVecs(mat, &x, NULL)); 3201 PetscCall(VecSet(x, -1.0)); 3202 PetscCall(VecDuplicate(x, &cmap)); 3203 PetscCall(VecSet(cmap, -1.0)); 3204 3205 /* Get start indices */ 3206 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3207 isstart -= ncols; 3208 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3209 3210 PetscCall(ISGetIndices(iscol, &is_idx)); 3211 PetscCall(VecGetArray(x, &xarray)); 3212 PetscCall(VecGetArray(cmap, &cmaparray)); 3213 PetscCall(PetscMalloc1(ncols, &idx)); 3214 for (i = 0; i < ncols; i++) { 3215 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3216 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3217 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3218 } 3219 PetscCall(VecRestoreArray(x, &xarray)); 3220 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3221 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3222 3223 /* Get iscol_d */ 3224 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3225 PetscCall(ISGetBlockSize(iscol, &i)); 3226 PetscCall(ISSetBlockSize(*iscol_d, i)); 3227 3228 /* Get isrow_d */ 3229 PetscCall(ISGetLocalSize(isrow, &m)); 3230 rstart = mat->rmap->rstart; 3231 PetscCall(PetscMalloc1(m, &idx)); 3232 PetscCall(ISGetIndices(isrow, &is_idx)); 3233 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3234 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3235 3236 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3237 PetscCall(ISGetBlockSize(isrow, &i)); 3238 PetscCall(ISSetBlockSize(*isrow_d, i)); 3239 3240 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3241 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3242 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3243 3244 PetscCall(VecDuplicate(lvec, &lcmap)); 3245 3246 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3247 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3248 3249 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3250 /* off-process column indices */ 3251 count = 0; 3252 PetscCall(PetscMalloc1(Bn, &idx)); 3253 PetscCall(PetscMalloc1(Bn, &cmap1)); 3254 3255 PetscCall(VecGetArray(lvec, &xarray)); 3256 PetscCall(VecGetArray(lcmap, &cmaparray)); 3257 for (i = 0; i < Bn; i++) { 3258 if (PetscRealPart(xarray[i]) > -1.0) { 3259 idx[count] = i; /* local column index in off-diagonal part B */ 3260 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3261 count++; 3262 } 3263 } 3264 PetscCall(VecRestoreArray(lvec, &xarray)); 3265 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3266 3267 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3268 /* cannot ensure iscol_o has same blocksize as iscol! */ 3269 3270 PetscCall(PetscFree(idx)); 3271 *garray = cmap1; 3272 3273 PetscCall(VecDestroy(&x)); 3274 PetscCall(VecDestroy(&cmap)); 3275 PetscCall(VecDestroy(&lcmap)); 3276 PetscFunctionReturn(PETSC_SUCCESS); 3277 } 3278 3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3281 { 3282 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3283 Mat M = NULL; 3284 MPI_Comm comm; 3285 IS iscol_d, isrow_d, iscol_o; 3286 Mat Asub = NULL, Bsub = NULL; 3287 PetscInt n; 3288 3289 PetscFunctionBegin; 3290 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3291 3292 if (call == MAT_REUSE_MATRIX) { 3293 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3294 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3295 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3296 3297 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3298 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3299 3300 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3301 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3302 3303 /* Update diagonal and off-diagonal portions of submat */ 3304 asub = (Mat_MPIAIJ *)(*submat)->data; 3305 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3306 PetscCall(ISGetLocalSize(iscol_o, &n)); 3307 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3308 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3309 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3310 3311 } else { /* call == MAT_INITIAL_MATRIX) */ 3312 PetscInt *garray; 3313 PetscInt BsubN; 3314 3315 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3316 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3317 3318 /* Create local submatrices Asub and Bsub */ 3319 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3320 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3321 3322 /* Create submatrix M */ 3323 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3324 3325 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3326 asub = (Mat_MPIAIJ *)M->data; 3327 3328 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3329 n = asub->B->cmap->N; 3330 if (BsubN > n) { 3331 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3332 const PetscInt *idx; 3333 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3334 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3335 3336 PetscCall(PetscMalloc1(n, &idx_new)); 3337 j = 0; 3338 PetscCall(ISGetIndices(iscol_o, &idx)); 3339 for (i = 0; i < n; i++) { 3340 if (j >= BsubN) break; 3341 while (subgarray[i] > garray[j]) j++; 3342 3343 if (subgarray[i] == garray[j]) { 3344 idx_new[i] = idx[j++]; 3345 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3346 } 3347 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3348 3349 PetscCall(ISDestroy(&iscol_o)); 3350 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3351 3352 } else if (BsubN < n) { 3353 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3354 } 3355 3356 PetscCall(PetscFree(garray)); 3357 *submat = M; 3358 3359 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3360 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3361 PetscCall(ISDestroy(&isrow_d)); 3362 3363 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3364 PetscCall(ISDestroy(&iscol_d)); 3365 3366 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3367 PetscCall(ISDestroy(&iscol_o)); 3368 } 3369 PetscFunctionReturn(PETSC_SUCCESS); 3370 } 3371 3372 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3373 { 3374 IS iscol_local = NULL, isrow_d; 3375 PetscInt csize; 3376 PetscInt n, i, j, start, end; 3377 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3378 MPI_Comm comm; 3379 3380 PetscFunctionBegin; 3381 /* If isrow has same processor distribution as mat, 3382 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3383 if (call == MAT_REUSE_MATRIX) { 3384 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3385 if (isrow_d) { 3386 sameRowDist = PETSC_TRUE; 3387 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3388 } else { 3389 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3390 if (iscol_local) { 3391 sameRowDist = PETSC_TRUE; 3392 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3393 } 3394 } 3395 } else { 3396 /* Check if isrow has same processor distribution as mat */ 3397 sameDist[0] = PETSC_FALSE; 3398 PetscCall(ISGetLocalSize(isrow, &n)); 3399 if (!n) { 3400 sameDist[0] = PETSC_TRUE; 3401 } else { 3402 PetscCall(ISGetMinMax(isrow, &i, &j)); 3403 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3404 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3405 } 3406 3407 /* Check if iscol has same processor distribution as mat */ 3408 sameDist[1] = PETSC_FALSE; 3409 PetscCall(ISGetLocalSize(iscol, &n)); 3410 if (!n) { 3411 sameDist[1] = PETSC_TRUE; 3412 } else { 3413 PetscCall(ISGetMinMax(iscol, &i, &j)); 3414 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3415 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3416 } 3417 3418 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3419 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3420 sameRowDist = tsameDist[0]; 3421 } 3422 3423 if (sameRowDist) { 3424 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3425 /* isrow and iscol have same processor distribution as mat */ 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } else { /* sameRowDist */ 3429 /* isrow has same processor distribution as mat */ 3430 if (call == MAT_INITIAL_MATRIX) { 3431 PetscBool sorted; 3432 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3433 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3434 PetscCall(ISGetSize(iscol, &i)); 3435 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3436 3437 PetscCall(ISSorted(iscol_local, &sorted)); 3438 if (sorted) { 3439 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3440 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3441 PetscFunctionReturn(PETSC_SUCCESS); 3442 } 3443 } else { /* call == MAT_REUSE_MATRIX */ 3444 IS iscol_sub; 3445 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3446 if (iscol_sub) { 3447 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3448 PetscFunctionReturn(PETSC_SUCCESS); 3449 } 3450 } 3451 } 3452 } 3453 3454 /* General case: iscol -> iscol_local which has global size of iscol */ 3455 if (call == MAT_REUSE_MATRIX) { 3456 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3457 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3458 } else { 3459 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3460 } 3461 3462 PetscCall(ISGetLocalSize(iscol, &csize)); 3463 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3464 3465 if (call == MAT_INITIAL_MATRIX) { 3466 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3467 PetscCall(ISDestroy(&iscol_local)); 3468 } 3469 PetscFunctionReturn(PETSC_SUCCESS); 3470 } 3471 3472 /*@C 3473 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3474 and "off-diagonal" part of the matrix in CSR format. 3475 3476 Collective 3477 3478 Input Parameters: 3479 + comm - MPI communicator 3480 . A - "diagonal" portion of matrix 3481 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3482 - garray - global index of `B` columns 3483 3484 Output Parameter: 3485 . mat - the matrix, with input `A` as its local diagonal matrix 3486 3487 Level: advanced 3488 3489 Notes: 3490 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3491 3492 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3493 3494 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3495 @*/ 3496 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3497 { 3498 Mat_MPIAIJ *maij; 3499 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3500 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3501 const PetscScalar *oa; 3502 Mat Bnew; 3503 PetscInt m, n, N; 3504 MatType mpi_mat_type; 3505 3506 PetscFunctionBegin; 3507 PetscCall(MatCreate(comm, mat)); 3508 PetscCall(MatGetSize(A, &m, &n)); 3509 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3510 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3511 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3512 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3513 3514 /* Get global columns of mat */ 3515 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3516 3517 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3518 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3519 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3520 PetscCall(MatSetType(*mat, mpi_mat_type)); 3521 3522 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3523 maij = (Mat_MPIAIJ *)(*mat)->data; 3524 3525 (*mat)->preallocated = PETSC_TRUE; 3526 3527 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3528 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3529 3530 /* Set A as diagonal portion of *mat */ 3531 maij->A = A; 3532 3533 nz = oi[m]; 3534 for (i = 0; i < nz; i++) { 3535 col = oj[i]; 3536 oj[i] = garray[col]; 3537 } 3538 3539 /* Set Bnew as off-diagonal portion of *mat */ 3540 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3541 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3542 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3543 bnew = (Mat_SeqAIJ *)Bnew->data; 3544 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3545 maij->B = Bnew; 3546 3547 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3548 3549 b->free_a = PETSC_FALSE; 3550 b->free_ij = PETSC_FALSE; 3551 PetscCall(MatDestroy(&B)); 3552 3553 bnew->free_a = PETSC_TRUE; 3554 bnew->free_ij = PETSC_TRUE; 3555 3556 /* condense columns of maij->B */ 3557 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3558 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3559 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3560 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3561 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3562 PetscFunctionReturn(PETSC_SUCCESS); 3563 } 3564 3565 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3566 3567 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3568 { 3569 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3570 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3571 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3572 Mat M, Msub, B = a->B; 3573 MatScalar *aa; 3574 Mat_SeqAIJ *aij; 3575 PetscInt *garray = a->garray, *colsub, Ncols; 3576 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3577 IS iscol_sub, iscmap; 3578 const PetscInt *is_idx, *cmap; 3579 PetscBool allcolumns = PETSC_FALSE; 3580 MPI_Comm comm; 3581 3582 PetscFunctionBegin; 3583 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3584 if (call == MAT_REUSE_MATRIX) { 3585 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3586 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3587 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3588 3589 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3590 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3591 3592 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3593 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3594 3595 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3596 3597 } else { /* call == MAT_INITIAL_MATRIX) */ 3598 PetscBool flg; 3599 3600 PetscCall(ISGetLocalSize(iscol, &n)); 3601 PetscCall(ISGetSize(iscol, &Ncols)); 3602 3603 /* (1) iscol -> nonscalable iscol_local */ 3604 /* Check for special case: each processor gets entire matrix columns */ 3605 PetscCall(ISIdentity(iscol_local, &flg)); 3606 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3607 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3608 if (allcolumns) { 3609 iscol_sub = iscol_local; 3610 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3611 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3612 3613 } else { 3614 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3615 PetscInt *idx, *cmap1, k; 3616 PetscCall(PetscMalloc1(Ncols, &idx)); 3617 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3618 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3619 count = 0; 3620 k = 0; 3621 for (i = 0; i < Ncols; i++) { 3622 j = is_idx[i]; 3623 if (j >= cstart && j < cend) { 3624 /* diagonal part of mat */ 3625 idx[count] = j; 3626 cmap1[count++] = i; /* column index in submat */ 3627 } else if (Bn) { 3628 /* off-diagonal part of mat */ 3629 if (j == garray[k]) { 3630 idx[count] = j; 3631 cmap1[count++] = i; /* column index in submat */ 3632 } else if (j > garray[k]) { 3633 while (j > garray[k] && k < Bn - 1) k++; 3634 if (j == garray[k]) { 3635 idx[count] = j; 3636 cmap1[count++] = i; /* column index in submat */ 3637 } 3638 } 3639 } 3640 } 3641 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3642 3643 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3644 PetscCall(ISGetBlockSize(iscol, &cbs)); 3645 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3646 3647 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3648 } 3649 3650 /* (3) Create sequential Msub */ 3651 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3652 } 3653 3654 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3655 aij = (Mat_SeqAIJ *)Msub->data; 3656 ii = aij->i; 3657 PetscCall(ISGetIndices(iscmap, &cmap)); 3658 3659 /* 3660 m - number of local rows 3661 Ncols - number of columns (same on all processors) 3662 rstart - first row in new global matrix generated 3663 */ 3664 PetscCall(MatGetSize(Msub, &m, NULL)); 3665 3666 if (call == MAT_INITIAL_MATRIX) { 3667 /* (4) Create parallel newmat */ 3668 PetscMPIInt rank, size; 3669 PetscInt csize; 3670 3671 PetscCallMPI(MPI_Comm_size(comm, &size)); 3672 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3673 3674 /* 3675 Determine the number of non-zeros in the diagonal and off-diagonal 3676 portions of the matrix in order to do correct preallocation 3677 */ 3678 3679 /* first get start and end of "diagonal" columns */ 3680 PetscCall(ISGetLocalSize(iscol, &csize)); 3681 if (csize == PETSC_DECIDE) { 3682 PetscCall(ISGetSize(isrow, &mglobal)); 3683 if (mglobal == Ncols) { /* square matrix */ 3684 nlocal = m; 3685 } else { 3686 nlocal = Ncols / size + ((Ncols % size) > rank); 3687 } 3688 } else { 3689 nlocal = csize; 3690 } 3691 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3692 rstart = rend - nlocal; 3693 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3694 3695 /* next, compute all the lengths */ 3696 jj = aij->j; 3697 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3698 olens = dlens + m; 3699 for (i = 0; i < m; i++) { 3700 jend = ii[i + 1] - ii[i]; 3701 olen = 0; 3702 dlen = 0; 3703 for (j = 0; j < jend; j++) { 3704 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3705 else dlen++; 3706 jj++; 3707 } 3708 olens[i] = olen; 3709 dlens[i] = dlen; 3710 } 3711 3712 PetscCall(ISGetBlockSize(isrow, &bs)); 3713 PetscCall(ISGetBlockSize(iscol, &cbs)); 3714 3715 PetscCall(MatCreate(comm, &M)); 3716 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3717 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3718 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3719 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3720 PetscCall(PetscFree(dlens)); 3721 3722 } else { /* call == MAT_REUSE_MATRIX */ 3723 M = *newmat; 3724 PetscCall(MatGetLocalSize(M, &i, NULL)); 3725 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3726 PetscCall(MatZeroEntries(M)); 3727 /* 3728 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3729 rather than the slower MatSetValues(). 3730 */ 3731 M->was_assembled = PETSC_TRUE; 3732 M->assembled = PETSC_FALSE; 3733 } 3734 3735 /* (5) Set values of Msub to *newmat */ 3736 PetscCall(PetscMalloc1(count, &colsub)); 3737 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3738 3739 jj = aij->j; 3740 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3741 for (i = 0; i < m; i++) { 3742 row = rstart + i; 3743 nz = ii[i + 1] - ii[i]; 3744 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3745 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3746 jj += nz; 3747 aa += nz; 3748 } 3749 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3750 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3751 3752 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3753 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3754 3755 PetscCall(PetscFree(colsub)); 3756 3757 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3758 if (call == MAT_INITIAL_MATRIX) { 3759 *newmat = M; 3760 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3761 PetscCall(MatDestroy(&Msub)); 3762 3763 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3764 PetscCall(ISDestroy(&iscol_sub)); 3765 3766 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3767 PetscCall(ISDestroy(&iscmap)); 3768 3769 if (iscol_local) { 3770 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3771 PetscCall(ISDestroy(&iscol_local)); 3772 } 3773 } 3774 PetscFunctionReturn(PETSC_SUCCESS); 3775 } 3776 3777 /* 3778 Not great since it makes two copies of the submatrix, first an SeqAIJ 3779 in local and then by concatenating the local matrices the end result. 3780 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3781 3782 This requires a sequential iscol with all indices. 3783 */ 3784 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3785 { 3786 PetscMPIInt rank, size; 3787 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3788 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3789 Mat M, Mreuse; 3790 MatScalar *aa, *vwork; 3791 MPI_Comm comm; 3792 Mat_SeqAIJ *aij; 3793 PetscBool colflag, allcolumns = PETSC_FALSE; 3794 3795 PetscFunctionBegin; 3796 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3797 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3798 PetscCallMPI(MPI_Comm_size(comm, &size)); 3799 3800 /* Check for special case: each processor gets entire matrix columns */ 3801 PetscCall(ISIdentity(iscol, &colflag)); 3802 PetscCall(ISGetLocalSize(iscol, &n)); 3803 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3804 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3805 3806 if (call == MAT_REUSE_MATRIX) { 3807 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3808 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3809 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3810 } else { 3811 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3812 } 3813 3814 /* 3815 m - number of local rows 3816 n - number of columns (same on all processors) 3817 rstart - first row in new global matrix generated 3818 */ 3819 PetscCall(MatGetSize(Mreuse, &m, &n)); 3820 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3821 if (call == MAT_INITIAL_MATRIX) { 3822 aij = (Mat_SeqAIJ *)Mreuse->data; 3823 ii = aij->i; 3824 jj = aij->j; 3825 3826 /* 3827 Determine the number of non-zeros in the diagonal and off-diagonal 3828 portions of the matrix in order to do correct preallocation 3829 */ 3830 3831 /* first get start and end of "diagonal" columns */ 3832 if (csize == PETSC_DECIDE) { 3833 PetscCall(ISGetSize(isrow, &mglobal)); 3834 if (mglobal == n) { /* square matrix */ 3835 nlocal = m; 3836 } else { 3837 nlocal = n / size + ((n % size) > rank); 3838 } 3839 } else { 3840 nlocal = csize; 3841 } 3842 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3843 rstart = rend - nlocal; 3844 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3845 3846 /* next, compute all the lengths */ 3847 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3848 olens = dlens + m; 3849 for (i = 0; i < m; i++) { 3850 jend = ii[i + 1] - ii[i]; 3851 olen = 0; 3852 dlen = 0; 3853 for (j = 0; j < jend; j++) { 3854 if (*jj < rstart || *jj >= rend) olen++; 3855 else dlen++; 3856 jj++; 3857 } 3858 olens[i] = olen; 3859 dlens[i] = dlen; 3860 } 3861 PetscCall(MatCreate(comm, &M)); 3862 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3863 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3864 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3865 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3866 PetscCall(PetscFree(dlens)); 3867 } else { 3868 PetscInt ml, nl; 3869 3870 M = *newmat; 3871 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3872 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3873 PetscCall(MatZeroEntries(M)); 3874 /* 3875 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3876 rather than the slower MatSetValues(). 3877 */ 3878 M->was_assembled = PETSC_TRUE; 3879 M->assembled = PETSC_FALSE; 3880 } 3881 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3882 aij = (Mat_SeqAIJ *)Mreuse->data; 3883 ii = aij->i; 3884 jj = aij->j; 3885 3886 /* trigger copy to CPU if needed */ 3887 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3888 for (i = 0; i < m; i++) { 3889 row = rstart + i; 3890 nz = ii[i + 1] - ii[i]; 3891 cwork = jj; 3892 jj = PetscSafePointerPlusOffset(jj, nz); 3893 vwork = aa; 3894 aa = PetscSafePointerPlusOffset(aa, nz); 3895 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3896 } 3897 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3898 3899 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3900 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3901 *newmat = M; 3902 3903 /* save submatrix used in processor for next request */ 3904 if (call == MAT_INITIAL_MATRIX) { 3905 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3906 PetscCall(MatDestroy(&Mreuse)); 3907 } 3908 PetscFunctionReturn(PETSC_SUCCESS); 3909 } 3910 3911 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3912 { 3913 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3914 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3915 const PetscInt *JJ; 3916 PetscBool nooffprocentries; 3917 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3918 3919 PetscFunctionBegin; 3920 PetscCall(PetscLayoutSetUp(B->rmap)); 3921 PetscCall(PetscLayoutSetUp(B->cmap)); 3922 m = B->rmap->n; 3923 cstart = B->cmap->rstart; 3924 cend = B->cmap->rend; 3925 rstart = B->rmap->rstart; 3926 irstart = Ii[0]; 3927 3928 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3929 3930 if (PetscDefined(USE_DEBUG)) { 3931 for (i = 0; i < m; i++) { 3932 nnz = Ii[i + 1] - Ii[i]; 3933 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3934 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3935 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3936 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3937 } 3938 } 3939 3940 for (i = 0; i < m; i++) { 3941 nnz = Ii[i + 1] - Ii[i]; 3942 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3943 nnz_max = PetscMax(nnz_max, nnz); 3944 d = 0; 3945 for (j = 0; j < nnz; j++) { 3946 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3947 } 3948 d_nnz[i] = d; 3949 o_nnz[i] = nnz - d; 3950 } 3951 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3952 PetscCall(PetscFree2(d_nnz, o_nnz)); 3953 3954 for (i = 0; i < m; i++) { 3955 ii = i + rstart; 3956 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3957 } 3958 nooffprocentries = B->nooffprocentries; 3959 B->nooffprocentries = PETSC_TRUE; 3960 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3961 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3962 B->nooffprocentries = nooffprocentries; 3963 3964 /* count number of entries below block diagonal */ 3965 PetscCall(PetscFree(Aij->ld)); 3966 PetscCall(PetscCalloc1(m, &ld)); 3967 Aij->ld = ld; 3968 for (i = 0; i < m; i++) { 3969 nnz = Ii[i + 1] - Ii[i]; 3970 j = 0; 3971 while (j < nnz && J[j] < cstart) j++; 3972 ld[i] = j; 3973 if (J) J += nnz; 3974 } 3975 3976 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3977 PetscFunctionReturn(PETSC_SUCCESS); 3978 } 3979 3980 /*@ 3981 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3982 (the default parallel PETSc format). 3983 3984 Collective 3985 3986 Input Parameters: 3987 + B - the matrix 3988 . i - the indices into `j` for the start of each local row (indices start with zero) 3989 . j - the column indices for each local row (indices start with zero) 3990 - v - optional values in the matrix 3991 3992 Level: developer 3993 3994 Notes: 3995 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3996 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3997 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3998 3999 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4000 4001 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4002 4003 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4004 4005 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4006 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4007 4008 The format which is used for the sparse matrix input, is equivalent to a 4009 row-major ordering.. i.e for the following matrix, the input data expected is 4010 as shown 4011 .vb 4012 1 0 0 4013 2 0 3 P0 4014 ------- 4015 4 5 6 P1 4016 4017 Process0 [P0] rows_owned=[0,1] 4018 i = {0,1,3} [size = nrow+1 = 2+1] 4019 j = {0,0,2} [size = 3] 4020 v = {1,2,3} [size = 3] 4021 4022 Process1 [P1] rows_owned=[2] 4023 i = {0,3} [size = nrow+1 = 1+1] 4024 j = {0,1,2} [size = 3] 4025 v = {4,5,6} [size = 3] 4026 .ve 4027 4028 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4029 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4030 @*/ 4031 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4032 { 4033 PetscFunctionBegin; 4034 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4035 PetscFunctionReturn(PETSC_SUCCESS); 4036 } 4037 4038 /*@ 4039 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4040 (the default parallel PETSc format). For good matrix assembly performance 4041 the user should preallocate the matrix storage by setting the parameters 4042 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4043 4044 Collective 4045 4046 Input Parameters: 4047 + B - the matrix 4048 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4049 (same value is used for all local rows) 4050 . d_nnz - array containing the number of nonzeros in the various rows of the 4051 DIAGONAL portion of the local submatrix (possibly different for each row) 4052 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4053 The size of this array is equal to the number of local rows, i.e 'm'. 4054 For matrices that will be factored, you must leave room for (and set) 4055 the diagonal entry even if it is zero. 4056 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4057 submatrix (same value is used for all local rows). 4058 - o_nnz - array containing the number of nonzeros in the various rows of the 4059 OFF-DIAGONAL portion of the local submatrix (possibly different for 4060 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4061 structure. The size of this array is equal to the number 4062 of local rows, i.e 'm'. 4063 4064 Example Usage: 4065 Consider the following 8x8 matrix with 34 non-zero values, that is 4066 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4067 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4068 as follows 4069 4070 .vb 4071 1 2 0 | 0 3 0 | 0 4 4072 Proc0 0 5 6 | 7 0 0 | 8 0 4073 9 0 10 | 11 0 0 | 12 0 4074 ------------------------------------- 4075 13 0 14 | 15 16 17 | 0 0 4076 Proc1 0 18 0 | 19 20 21 | 0 0 4077 0 0 0 | 22 23 0 | 24 0 4078 ------------------------------------- 4079 Proc2 25 26 27 | 0 0 28 | 29 0 4080 30 0 0 | 31 32 33 | 0 34 4081 .ve 4082 4083 This can be represented as a collection of submatrices as 4084 .vb 4085 A B C 4086 D E F 4087 G H I 4088 .ve 4089 4090 Where the submatrices A,B,C are owned by proc0, D,E,F are 4091 owned by proc1, G,H,I are owned by proc2. 4092 4093 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4094 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4095 The 'M','N' parameters are 8,8, and have the same values on all procs. 4096 4097 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4098 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4099 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4100 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4101 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4102 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4103 4104 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4105 allocated for every row of the local diagonal submatrix, and `o_nz` 4106 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4107 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4108 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4109 In this case, the values of `d_nz`, `o_nz` are 4110 .vb 4111 proc0 dnz = 2, o_nz = 2 4112 proc1 dnz = 3, o_nz = 2 4113 proc2 dnz = 1, o_nz = 4 4114 .ve 4115 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4116 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4117 for proc3. i.e we are using 12+15+10=37 storage locations to store 4118 34 values. 4119 4120 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4121 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4122 In the above case the values for `d_nnz`, `o_nnz` are 4123 .vb 4124 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4125 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4126 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4127 .ve 4128 Here the space allocated is sum of all the above values i.e 34, and 4129 hence pre-allocation is perfect. 4130 4131 Level: intermediate 4132 4133 Notes: 4134 If the *_nnz parameter is given then the *_nz parameter is ignored 4135 4136 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4137 storage. The stored row and column indices begin with zero. 4138 See [Sparse Matrices](sec_matsparse) for details. 4139 4140 The parallel matrix is partitioned such that the first m0 rows belong to 4141 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4142 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4143 4144 The DIAGONAL portion of the local submatrix of a processor can be defined 4145 as the submatrix which is obtained by extraction the part corresponding to 4146 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4147 first row that belongs to the processor, r2 is the last row belonging to 4148 the this processor, and c1-c2 is range of indices of the local part of a 4149 vector suitable for applying the matrix to. This is an mxn matrix. In the 4150 common case of a square matrix, the row and column ranges are the same and 4151 the DIAGONAL part is also square. The remaining portion of the local 4152 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4153 4154 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4155 4156 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4157 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4158 You can also run with the option `-info` and look for messages with the string 4159 malloc in them to see if additional memory allocation was needed. 4160 4161 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4162 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4163 @*/ 4164 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4165 { 4166 PetscFunctionBegin; 4167 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4168 PetscValidType(B, 1); 4169 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4170 PetscFunctionReturn(PETSC_SUCCESS); 4171 } 4172 4173 /*@ 4174 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4175 CSR format for the local rows. 4176 4177 Collective 4178 4179 Input Parameters: 4180 + comm - MPI communicator 4181 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4182 . n - This value should be the same as the local size used in creating the 4183 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4184 calculated if `N` is given) For square matrices n is almost always `m`. 4185 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4186 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4187 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4188 . j - global column indices 4189 - a - optional matrix values 4190 4191 Output Parameter: 4192 . mat - the matrix 4193 4194 Level: intermediate 4195 4196 Notes: 4197 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4198 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4199 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4200 4201 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4202 4203 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4204 4205 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4206 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4207 4208 The format which is used for the sparse matrix input, is equivalent to a 4209 row-major ordering, i.e., for the following matrix, the input data expected is 4210 as shown 4211 .vb 4212 1 0 0 4213 2 0 3 P0 4214 ------- 4215 4 5 6 P1 4216 4217 Process0 [P0] rows_owned=[0,1] 4218 i = {0,1,3} [size = nrow+1 = 2+1] 4219 j = {0,0,2} [size = 3] 4220 v = {1,2,3} [size = 3] 4221 4222 Process1 [P1] rows_owned=[2] 4223 i = {0,3} [size = nrow+1 = 1+1] 4224 j = {0,1,2} [size = 3] 4225 v = {4,5,6} [size = 3] 4226 .ve 4227 4228 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4229 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4230 @*/ 4231 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4232 { 4233 PetscFunctionBegin; 4234 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4235 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4236 PetscCall(MatCreate(comm, mat)); 4237 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4238 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4239 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4240 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4241 PetscFunctionReturn(PETSC_SUCCESS); 4242 } 4243 4244 /*@ 4245 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4246 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4247 from `MatCreateMPIAIJWithArrays()` 4248 4249 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4250 4251 Collective 4252 4253 Input Parameters: 4254 + mat - the matrix 4255 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4256 . n - This value should be the same as the local size used in creating the 4257 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4258 calculated if N is given) For square matrices n is almost always m. 4259 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4260 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4261 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4262 . J - column indices 4263 - v - matrix values 4264 4265 Level: deprecated 4266 4267 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4268 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4269 @*/ 4270 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4271 { 4272 PetscInt nnz, i; 4273 PetscBool nooffprocentries; 4274 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4275 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4276 PetscScalar *ad, *ao; 4277 PetscInt ldi, Iii, md; 4278 const PetscInt *Adi = Ad->i; 4279 PetscInt *ld = Aij->ld; 4280 4281 PetscFunctionBegin; 4282 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4283 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4284 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4285 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4286 4287 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4288 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4289 4290 for (i = 0; i < m; i++) { 4291 if (PetscDefined(USE_DEBUG)) { 4292 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4293 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4294 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4295 } 4296 } 4297 nnz = Ii[i + 1] - Ii[i]; 4298 Iii = Ii[i]; 4299 ldi = ld[i]; 4300 md = Adi[i + 1] - Adi[i]; 4301 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4302 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4303 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4304 ad += md; 4305 ao += nnz - md; 4306 } 4307 nooffprocentries = mat->nooffprocentries; 4308 mat->nooffprocentries = PETSC_TRUE; 4309 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4310 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4311 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4312 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4313 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4314 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4315 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4316 mat->nooffprocentries = nooffprocentries; 4317 PetscFunctionReturn(PETSC_SUCCESS); 4318 } 4319 4320 /*@ 4321 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4322 4323 Collective 4324 4325 Input Parameters: 4326 + mat - the matrix 4327 - v - matrix values, stored by row 4328 4329 Level: intermediate 4330 4331 Notes: 4332 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4333 4334 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4335 4336 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4337 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4338 @*/ 4339 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4340 { 4341 PetscInt nnz, i, m; 4342 PetscBool nooffprocentries; 4343 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4344 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4345 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4346 PetscScalar *ad, *ao; 4347 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4348 PetscInt ldi, Iii, md; 4349 PetscInt *ld = Aij->ld; 4350 4351 PetscFunctionBegin; 4352 m = mat->rmap->n; 4353 4354 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4355 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4356 Iii = 0; 4357 for (i = 0; i < m; i++) { 4358 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4359 ldi = ld[i]; 4360 md = Adi[i + 1] - Adi[i]; 4361 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4362 ad += md; 4363 if (ao) { 4364 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4365 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4366 ao += nnz - md; 4367 } 4368 Iii += nnz; 4369 } 4370 nooffprocentries = mat->nooffprocentries; 4371 mat->nooffprocentries = PETSC_TRUE; 4372 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4373 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4374 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4375 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4376 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4377 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4378 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4379 mat->nooffprocentries = nooffprocentries; 4380 PetscFunctionReturn(PETSC_SUCCESS); 4381 } 4382 4383 /*@ 4384 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4385 (the default parallel PETSc format). For good matrix assembly performance 4386 the user should preallocate the matrix storage by setting the parameters 4387 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4388 4389 Collective 4390 4391 Input Parameters: 4392 + comm - MPI communicator 4393 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4394 This value should be the same as the local size used in creating the 4395 y vector for the matrix-vector product y = Ax. 4396 . n - This value should be the same as the local size used in creating the 4397 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4398 calculated if N is given) For square matrices n is almost always m. 4399 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4400 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4401 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4402 (same value is used for all local rows) 4403 . d_nnz - array containing the number of nonzeros in the various rows of the 4404 DIAGONAL portion of the local submatrix (possibly different for each row) 4405 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4406 The size of this array is equal to the number of local rows, i.e 'm'. 4407 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4408 submatrix (same value is used for all local rows). 4409 - o_nnz - array containing the number of nonzeros in the various rows of the 4410 OFF-DIAGONAL portion of the local submatrix (possibly different for 4411 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4412 structure. The size of this array is equal to the number 4413 of local rows, i.e 'm'. 4414 4415 Output Parameter: 4416 . A - the matrix 4417 4418 Options Database Keys: 4419 + -mat_no_inode - Do not use inodes 4420 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4421 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4422 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4423 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4424 4425 Level: intermediate 4426 4427 Notes: 4428 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4429 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4430 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4431 4432 If the *_nnz parameter is given then the *_nz parameter is ignored 4433 4434 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4435 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4436 storage requirements for this matrix. 4437 4438 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4439 processor than it must be used on all processors that share the object for 4440 that argument. 4441 4442 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4443 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4444 4445 The user MUST specify either the local or global matrix dimensions 4446 (possibly both). 4447 4448 The parallel matrix is partitioned across processors such that the 4449 first `m0` rows belong to process 0, the next `m1` rows belong to 4450 process 1, the next `m2` rows belong to process 2, etc., where 4451 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4452 values corresponding to [m x N] submatrix. 4453 4454 The columns are logically partitioned with the n0 columns belonging 4455 to 0th partition, the next n1 columns belonging to the next 4456 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4457 4458 The DIAGONAL portion of the local submatrix on any given processor 4459 is the submatrix corresponding to the rows and columns m,n 4460 corresponding to the given processor. i.e diagonal matrix on 4461 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4462 etc. The remaining portion of the local submatrix [m x (N-n)] 4463 constitute the OFF-DIAGONAL portion. The example below better 4464 illustrates this concept. 4465 4466 For a square global matrix we define each processor's diagonal portion 4467 to be its local rows and the corresponding columns (a square submatrix); 4468 each processor's off-diagonal portion encompasses the remainder of the 4469 local matrix (a rectangular submatrix). 4470 4471 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4472 4473 When calling this routine with a single process communicator, a matrix of 4474 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4475 type of communicator, use the construction mechanism 4476 .vb 4477 MatCreate(..., &A); 4478 MatSetType(A, MATMPIAIJ); 4479 MatSetSizes(A, m, n, M, N); 4480 MatMPIAIJSetPreallocation(A, ...); 4481 .ve 4482 4483 By default, this format uses inodes (identical nodes) when possible. 4484 We search for consecutive rows with the same nonzero structure, thereby 4485 reusing matrix information to achieve increased efficiency. 4486 4487 Example Usage: 4488 Consider the following 8x8 matrix with 34 non-zero values, that is 4489 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4490 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4491 as follows 4492 4493 .vb 4494 1 2 0 | 0 3 0 | 0 4 4495 Proc0 0 5 6 | 7 0 0 | 8 0 4496 9 0 10 | 11 0 0 | 12 0 4497 ------------------------------------- 4498 13 0 14 | 15 16 17 | 0 0 4499 Proc1 0 18 0 | 19 20 21 | 0 0 4500 0 0 0 | 22 23 0 | 24 0 4501 ------------------------------------- 4502 Proc2 25 26 27 | 0 0 28 | 29 0 4503 30 0 0 | 31 32 33 | 0 34 4504 .ve 4505 4506 This can be represented as a collection of submatrices as 4507 4508 .vb 4509 A B C 4510 D E F 4511 G H I 4512 .ve 4513 4514 Where the submatrices A,B,C are owned by proc0, D,E,F are 4515 owned by proc1, G,H,I are owned by proc2. 4516 4517 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4518 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4519 The 'M','N' parameters are 8,8, and have the same values on all procs. 4520 4521 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4522 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4523 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4524 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4525 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4526 matrix, ans [DF] as another SeqAIJ matrix. 4527 4528 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4529 allocated for every row of the local diagonal submatrix, and `o_nz` 4530 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4531 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4532 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4533 In this case, the values of `d_nz`,`o_nz` are 4534 .vb 4535 proc0 dnz = 2, o_nz = 2 4536 proc1 dnz = 3, o_nz = 2 4537 proc2 dnz = 1, o_nz = 4 4538 .ve 4539 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4540 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4541 for proc3. i.e we are using 12+15+10=37 storage locations to store 4542 34 values. 4543 4544 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4545 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4546 In the above case the values for d_nnz,o_nnz are 4547 .vb 4548 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4549 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4550 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4551 .ve 4552 Here the space allocated is sum of all the above values i.e 34, and 4553 hence pre-allocation is perfect. 4554 4555 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4556 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4557 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4558 @*/ 4559 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4560 { 4561 PetscMPIInt size; 4562 4563 PetscFunctionBegin; 4564 PetscCall(MatCreate(comm, A)); 4565 PetscCall(MatSetSizes(*A, m, n, M, N)); 4566 PetscCallMPI(MPI_Comm_size(comm, &size)); 4567 if (size > 1) { 4568 PetscCall(MatSetType(*A, MATMPIAIJ)); 4569 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4570 } else { 4571 PetscCall(MatSetType(*A, MATSEQAIJ)); 4572 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4573 } 4574 PetscFunctionReturn(PETSC_SUCCESS); 4575 } 4576 4577 /*MC 4578 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4579 4580 Synopsis: 4581 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4582 4583 Not Collective 4584 4585 Input Parameter: 4586 . A - the `MATMPIAIJ` matrix 4587 4588 Output Parameters: 4589 + Ad - the diagonal portion of the matrix 4590 . Ao - the off-diagonal portion of the matrix 4591 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4592 - ierr - error code 4593 4594 Level: advanced 4595 4596 Note: 4597 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4598 4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4600 M*/ 4601 4602 /*MC 4603 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4604 4605 Synopsis: 4606 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4607 4608 Not Collective 4609 4610 Input Parameters: 4611 + A - the `MATMPIAIJ` matrix 4612 . Ad - the diagonal portion of the matrix 4613 . Ao - the off-diagonal portion of the matrix 4614 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4615 - ierr - error code 4616 4617 Level: advanced 4618 4619 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4620 M*/ 4621 4622 /*@C 4623 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4624 4625 Not Collective 4626 4627 Input Parameter: 4628 . A - The `MATMPIAIJ` matrix 4629 4630 Output Parameters: 4631 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4632 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4633 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4634 4635 Level: intermediate 4636 4637 Note: 4638 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4639 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4640 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4641 local column numbers to global column numbers in the original matrix. 4642 4643 Fortran Notes: 4644 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4645 4646 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4647 @*/ 4648 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4649 { 4650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4651 PetscBool flg; 4652 4653 PetscFunctionBegin; 4654 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4655 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4656 if (Ad) *Ad = a->A; 4657 if (Ao) *Ao = a->B; 4658 if (colmap) *colmap = a->garray; 4659 PetscFunctionReturn(PETSC_SUCCESS); 4660 } 4661 4662 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4663 { 4664 PetscInt m, N, i, rstart, nnz, Ii; 4665 PetscInt *indx; 4666 PetscScalar *values; 4667 MatType rootType; 4668 4669 PetscFunctionBegin; 4670 PetscCall(MatGetSize(inmat, &m, &N)); 4671 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4672 PetscInt *dnz, *onz, sum, bs, cbs; 4673 4674 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4675 /* Check sum(n) = N */ 4676 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4677 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4678 4679 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4680 rstart -= m; 4681 4682 MatPreallocateBegin(comm, m, n, dnz, onz); 4683 for (i = 0; i < m; i++) { 4684 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4685 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4686 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4687 } 4688 4689 PetscCall(MatCreate(comm, outmat)); 4690 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4691 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4692 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4693 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4694 PetscCall(MatSetType(*outmat, rootType)); 4695 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4696 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4697 MatPreallocateEnd(dnz, onz); 4698 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4699 } 4700 4701 /* numeric phase */ 4702 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4703 for (i = 0; i < m; i++) { 4704 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4705 Ii = i + rstart; 4706 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4707 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4708 } 4709 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4710 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4715 { 4716 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4717 4718 PetscFunctionBegin; 4719 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4720 PetscCall(PetscFree(merge->id_r)); 4721 PetscCall(PetscFree(merge->len_s)); 4722 PetscCall(PetscFree(merge->len_r)); 4723 PetscCall(PetscFree(merge->bi)); 4724 PetscCall(PetscFree(merge->bj)); 4725 PetscCall(PetscFree(merge->buf_ri[0])); 4726 PetscCall(PetscFree(merge->buf_ri)); 4727 PetscCall(PetscFree(merge->buf_rj[0])); 4728 PetscCall(PetscFree(merge->buf_rj)); 4729 PetscCall(PetscFree(merge->coi)); 4730 PetscCall(PetscFree(merge->coj)); 4731 PetscCall(PetscFree(merge->owners_co)); 4732 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4733 PetscCall(PetscFree(merge)); 4734 PetscFunctionReturn(PETSC_SUCCESS); 4735 } 4736 4737 #include <../src/mat/utils/freespace.h> 4738 #include <petscbt.h> 4739 4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4741 { 4742 MPI_Comm comm; 4743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4744 PetscMPIInt size, rank, taga, *len_s; 4745 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4746 PetscMPIInt proc, k; 4747 PetscInt **buf_ri, **buf_rj; 4748 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4749 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4750 MPI_Request *s_waits, *r_waits; 4751 MPI_Status *status; 4752 const MatScalar *aa, *a_a; 4753 MatScalar **abuf_r, *ba_i; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4759 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4760 4761 PetscCallMPI(MPI_Comm_size(comm, &size)); 4762 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4763 4764 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4765 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4766 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4767 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4768 aa = a_a; 4769 4770 bi = merge->bi; 4771 bj = merge->bj; 4772 buf_ri = merge->buf_ri; 4773 buf_rj = merge->buf_rj; 4774 4775 PetscCall(PetscMalloc1(size, &status)); 4776 owners = merge->rowmap->range; 4777 len_s = merge->len_s; 4778 4779 /* send and recv matrix values */ 4780 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4781 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4782 4783 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4784 for (proc = 0, k = 0; proc < size; proc++) { 4785 if (!len_s[proc]) continue; 4786 i = owners[proc]; 4787 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4788 k++; 4789 } 4790 4791 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4792 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4793 PetscCall(PetscFree(status)); 4794 4795 PetscCall(PetscFree(s_waits)); 4796 PetscCall(PetscFree(r_waits)); 4797 4798 /* insert mat values of mpimat */ 4799 PetscCall(PetscMalloc1(N, &ba_i)); 4800 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4801 4802 for (k = 0; k < merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *buf_ri_k[k]; 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 /* set values of ba */ 4810 m = merge->rowmap->n; 4811 for (i = 0; i < m; i++) { 4812 arow = owners[rank] + i; 4813 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4814 bnzi = bi[i + 1] - bi[i]; 4815 PetscCall(PetscArrayzero(ba_i, bnzi)); 4816 4817 /* add local non-zero vals of this proc's seqmat into ba */ 4818 anzi = ai[arow + 1] - ai[arow]; 4819 aj = a->j + ai[arow]; 4820 aa = a_a + ai[arow]; 4821 nextaj = 0; 4822 for (j = 0; nextaj < anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 4828 /* add received vals into ba */ 4829 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4830 /* i-th row */ 4831 if (i == *nextrow[k]) { 4832 anzi = *(nextai[k] + 1) - *nextai[k]; 4833 aj = buf_rj[k] + *nextai[k]; 4834 aa = abuf_r[k] + *nextai[k]; 4835 nextaj = 0; 4836 for (j = 0; nextaj < anzi; j++) { 4837 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4838 ba_i[j] += aa[nextaj++]; 4839 } 4840 } 4841 nextrow[k]++; 4842 nextai[k]++; 4843 } 4844 } 4845 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4846 } 4847 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4848 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4849 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4850 4851 PetscCall(PetscFree(abuf_r[0])); 4852 PetscCall(PetscFree(abuf_r)); 4853 PetscCall(PetscFree(ba_i)); 4854 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4855 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4856 PetscFunctionReturn(PETSC_SUCCESS); 4857 } 4858 4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4860 { 4861 Mat B_mpi; 4862 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4863 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4864 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4865 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4866 PetscInt len, *dnz, *onz, bs, cbs; 4867 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4868 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4869 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4870 MPI_Status *status; 4871 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4872 PetscBT lnkbt; 4873 Mat_Merge_SeqsToMPI *merge; 4874 PetscContainer container; 4875 4876 PetscFunctionBegin; 4877 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4878 4879 /* make sure it is a PETSc comm */ 4880 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4881 PetscCallMPI(MPI_Comm_size(comm, &size)); 4882 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4883 4884 PetscCall(PetscNew(&merge)); 4885 PetscCall(PetscMalloc1(size, &status)); 4886 4887 /* determine row ownership */ 4888 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4889 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4890 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4891 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4892 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4893 PetscCall(PetscMalloc1(size, &len_si)); 4894 PetscCall(PetscMalloc1(size, &merge->len_s)); 4895 4896 m = merge->rowmap->n; 4897 owners = merge->rowmap->range; 4898 4899 /* determine the number of messages to send, their lengths */ 4900 len_s = merge->len_s; 4901 4902 len = 0; /* length of buf_si[] */ 4903 merge->nsend = 0; 4904 for (PetscMPIInt proc = 0; proc < size; proc++) { 4905 len_si[proc] = 0; 4906 if (proc == rank) { 4907 len_s[proc] = 0; 4908 } else { 4909 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4910 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4911 } 4912 if (len_s[proc]) { 4913 merge->nsend++; 4914 nrows = 0; 4915 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4916 if (ai[i + 1] > ai[i]) nrows++; 4917 } 4918 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4919 len += len_si[proc]; 4920 } 4921 } 4922 4923 /* determine the number and length of messages to receive for ij-structure */ 4924 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4925 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4926 4927 /* post the Irecv of j-structure */ 4928 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4929 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4930 4931 /* post the Isend of j-structure */ 4932 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4933 4934 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4935 if (!len_s[proc]) continue; 4936 i = owners[proc]; 4937 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4938 k++; 4939 } 4940 4941 /* receives and sends of j-structure are complete */ 4942 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4943 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4944 4945 /* send and recv i-structure */ 4946 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4947 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4948 4949 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4950 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4951 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4952 if (!len_s[proc]) continue; 4953 /* form outgoing message for i-structure: 4954 buf_si[0]: nrows to be sent 4955 [1:nrows]: row index (global) 4956 [nrows+1:2*nrows+1]: i-structure index 4957 */ 4958 nrows = len_si[proc] / 2 - 1; 4959 buf_si_i = buf_si + nrows + 1; 4960 buf_si[0] = nrows; 4961 buf_si_i[0] = 0; 4962 nrows = 0; 4963 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4964 anzi = ai[i + 1] - ai[i]; 4965 if (anzi) { 4966 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4967 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4968 nrows++; 4969 } 4970 } 4971 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4972 k++; 4973 buf_si += len_si[proc]; 4974 } 4975 4976 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4977 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4978 4979 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4980 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4981 4982 PetscCall(PetscFree(len_si)); 4983 PetscCall(PetscFree(len_ri)); 4984 PetscCall(PetscFree(rj_waits)); 4985 PetscCall(PetscFree2(si_waits, sj_waits)); 4986 PetscCall(PetscFree(ri_waits)); 4987 PetscCall(PetscFree(buf_s)); 4988 PetscCall(PetscFree(status)); 4989 4990 /* compute a local seq matrix in each processor */ 4991 /* allocate bi array and free space for accumulating nonzero column info */ 4992 PetscCall(PetscMalloc1(m + 1, &bi)); 4993 bi[0] = 0; 4994 4995 /* create and initialize a linked list */ 4996 nlnk = N + 1; 4997 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4998 4999 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5000 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5001 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5002 5003 current_space = free_space; 5004 5005 /* determine symbolic info for each local row */ 5006 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5007 5008 for (k = 0; k < merge->nrecv; k++) { 5009 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5010 nrows = *buf_ri_k[k]; 5011 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5012 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5013 } 5014 5015 MatPreallocateBegin(comm, m, n, dnz, onz); 5016 len = 0; 5017 for (i = 0; i < m; i++) { 5018 bnzi = 0; 5019 /* add local non-zero cols of this proc's seqmat into lnk */ 5020 arow = owners[rank] + i; 5021 anzi = ai[arow + 1] - ai[arow]; 5022 aj = a->j + ai[arow]; 5023 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5024 bnzi += nlnk; 5025 /* add received col data into lnk */ 5026 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5027 if (i == *nextrow[k]) { /* i-th row */ 5028 anzi = *(nextai[k] + 1) - *nextai[k]; 5029 aj = buf_rj[k] + *nextai[k]; 5030 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5031 bnzi += nlnk; 5032 nextrow[k]++; 5033 nextai[k]++; 5034 } 5035 } 5036 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5037 5038 /* if free space is not available, make more free space */ 5039 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5040 /* copy data into free space, then initialize lnk */ 5041 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5042 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5043 5044 current_space->array += bnzi; 5045 current_space->local_used += bnzi; 5046 current_space->local_remaining -= bnzi; 5047 5048 bi[i + 1] = bi[i] + bnzi; 5049 } 5050 5051 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5052 5053 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5054 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5055 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5056 5057 /* create symbolic parallel matrix B_mpi */ 5058 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5059 PetscCall(MatCreate(comm, &B_mpi)); 5060 if (n == PETSC_DECIDE) { 5061 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5062 } else { 5063 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5064 } 5065 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5066 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5067 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5068 MatPreallocateEnd(dnz, onz); 5069 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5070 5071 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5072 B_mpi->assembled = PETSC_FALSE; 5073 merge->bi = bi; 5074 merge->bj = bj; 5075 merge->buf_ri = buf_ri; 5076 merge->buf_rj = buf_rj; 5077 merge->coi = NULL; 5078 merge->coj = NULL; 5079 merge->owners_co = NULL; 5080 5081 PetscCall(PetscCommDestroy(&comm)); 5082 5083 /* attach the supporting struct to B_mpi for reuse */ 5084 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5085 PetscCall(PetscContainerSetPointer(container, merge)); 5086 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5087 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5088 PetscCall(PetscContainerDestroy(&container)); 5089 *mpimat = B_mpi; 5090 5091 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5092 PetscFunctionReturn(PETSC_SUCCESS); 5093 } 5094 5095 /*@ 5096 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5097 matrices from each processor 5098 5099 Collective 5100 5101 Input Parameters: 5102 + comm - the communicators the parallel matrix will live on 5103 . seqmat - the input sequential matrices 5104 . m - number of local rows (or `PETSC_DECIDE`) 5105 . n - number of local columns (or `PETSC_DECIDE`) 5106 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5107 5108 Output Parameter: 5109 . mpimat - the parallel matrix generated 5110 5111 Level: advanced 5112 5113 Note: 5114 The dimensions of the sequential matrix in each processor MUST be the same. 5115 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5116 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5117 5118 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5119 @*/ 5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5121 { 5122 PetscMPIInt size; 5123 5124 PetscFunctionBegin; 5125 PetscCallMPI(MPI_Comm_size(comm, &size)); 5126 if (size == 1) { 5127 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5128 if (scall == MAT_INITIAL_MATRIX) { 5129 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5130 } else { 5131 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5132 } 5133 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5134 PetscFunctionReturn(PETSC_SUCCESS); 5135 } 5136 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5138 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5139 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5140 PetscFunctionReturn(PETSC_SUCCESS); 5141 } 5142 5143 /*@ 5144 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5145 5146 Not Collective 5147 5148 Input Parameter: 5149 . A - the matrix 5150 5151 Output Parameter: 5152 . A_loc - the local sequential matrix generated 5153 5154 Level: developer 5155 5156 Notes: 5157 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5158 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5159 `n` is the global column count obtained with `MatGetSize()` 5160 5161 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5162 5163 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5164 5165 Destroy the matrix with `MatDestroy()` 5166 5167 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5168 @*/ 5169 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5170 { 5171 PetscBool mpi; 5172 5173 PetscFunctionBegin; 5174 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5175 if (mpi) { 5176 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5177 } else { 5178 *A_loc = A; 5179 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5180 } 5181 PetscFunctionReturn(PETSC_SUCCESS); 5182 } 5183 5184 /*@ 5185 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5186 5187 Not Collective 5188 5189 Input Parameters: 5190 + A - the matrix 5191 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5192 5193 Output Parameter: 5194 . A_loc - the local sequential matrix generated 5195 5196 Level: developer 5197 5198 Notes: 5199 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5200 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5201 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5202 5203 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5204 5205 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5206 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5207 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5208 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5209 5210 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5211 @*/ 5212 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5213 { 5214 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5215 Mat_SeqAIJ *mat, *a, *b; 5216 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5217 const PetscScalar *aa, *ba, *aav, *bav; 5218 PetscScalar *ca, *cam; 5219 PetscMPIInt size; 5220 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5221 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5222 PetscBool match; 5223 5224 PetscFunctionBegin; 5225 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5226 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5227 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5228 if (size == 1) { 5229 if (scall == MAT_INITIAL_MATRIX) { 5230 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5231 *A_loc = mpimat->A; 5232 } else if (scall == MAT_REUSE_MATRIX) { 5233 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5234 } 5235 PetscFunctionReturn(PETSC_SUCCESS); 5236 } 5237 5238 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5239 a = (Mat_SeqAIJ *)mpimat->A->data; 5240 b = (Mat_SeqAIJ *)mpimat->B->data; 5241 ai = a->i; 5242 aj = a->j; 5243 bi = b->i; 5244 bj = b->j; 5245 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5246 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5247 aa = aav; 5248 ba = bav; 5249 if (scall == MAT_INITIAL_MATRIX) { 5250 PetscCall(PetscMalloc1(1 + am, &ci)); 5251 ci[0] = 0; 5252 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5253 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5254 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5255 k = 0; 5256 for (i = 0; i < am; i++) { 5257 ncols_o = bi[i + 1] - bi[i]; 5258 ncols_d = ai[i + 1] - ai[i]; 5259 /* off-diagonal portion of A */ 5260 for (jo = 0; jo < ncols_o; jo++) { 5261 col = cmap[*bj]; 5262 if (col >= cstart) break; 5263 cj[k] = col; 5264 bj++; 5265 ca[k++] = *ba++; 5266 } 5267 /* diagonal portion of A */ 5268 for (j = 0; j < ncols_d; j++) { 5269 cj[k] = cstart + *aj++; 5270 ca[k++] = *aa++; 5271 } 5272 /* off-diagonal portion of A */ 5273 for (j = jo; j < ncols_o; j++) { 5274 cj[k] = cmap[*bj++]; 5275 ca[k++] = *ba++; 5276 } 5277 } 5278 /* put together the new matrix */ 5279 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5280 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5281 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5282 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5283 mat->free_a = PETSC_TRUE; 5284 mat->free_ij = PETSC_TRUE; 5285 mat->nonew = 0; 5286 } else if (scall == MAT_REUSE_MATRIX) { 5287 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5288 ci = mat->i; 5289 cj = mat->j; 5290 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5291 for (i = 0; i < am; i++) { 5292 /* off-diagonal portion of A */ 5293 ncols_o = bi[i + 1] - bi[i]; 5294 for (jo = 0; jo < ncols_o; jo++) { 5295 col = cmap[*bj]; 5296 if (col >= cstart) break; 5297 *cam++ = *ba++; 5298 bj++; 5299 } 5300 /* diagonal portion of A */ 5301 ncols_d = ai[i + 1] - ai[i]; 5302 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5303 /* off-diagonal portion of A */ 5304 for (j = jo; j < ncols_o; j++) { 5305 *cam++ = *ba++; 5306 bj++; 5307 } 5308 } 5309 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5310 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5311 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5312 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5313 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5314 PetscFunctionReturn(PETSC_SUCCESS); 5315 } 5316 5317 /*@ 5318 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5319 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5320 5321 Not Collective 5322 5323 Input Parameters: 5324 + A - the matrix 5325 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5326 5327 Output Parameters: 5328 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5329 - A_loc - the local sequential matrix generated 5330 5331 Level: developer 5332 5333 Note: 5334 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5335 part, then those associated with the off-diagonal part (in its local ordering) 5336 5337 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5338 @*/ 5339 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5340 { 5341 Mat Ao, Ad; 5342 const PetscInt *cmap; 5343 PetscMPIInt size; 5344 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5345 5346 PetscFunctionBegin; 5347 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5348 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5349 if (size == 1) { 5350 if (scall == MAT_INITIAL_MATRIX) { 5351 PetscCall(PetscObjectReference((PetscObject)Ad)); 5352 *A_loc = Ad; 5353 } else if (scall == MAT_REUSE_MATRIX) { 5354 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5355 } 5356 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5357 PetscFunctionReturn(PETSC_SUCCESS); 5358 } 5359 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5360 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5361 if (f) { 5362 PetscCall((*f)(A, scall, glob, A_loc)); 5363 } else { 5364 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5365 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5366 Mat_SeqAIJ *c; 5367 PetscInt *ai = a->i, *aj = a->j; 5368 PetscInt *bi = b->i, *bj = b->j; 5369 PetscInt *ci, *cj; 5370 const PetscScalar *aa, *ba; 5371 PetscScalar *ca; 5372 PetscInt i, j, am, dn, on; 5373 5374 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5375 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5376 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5377 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5378 if (scall == MAT_INITIAL_MATRIX) { 5379 PetscInt k; 5380 PetscCall(PetscMalloc1(1 + am, &ci)); 5381 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5382 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5383 ci[0] = 0; 5384 for (i = 0, k = 0; i < am; i++) { 5385 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5386 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5387 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5388 /* diagonal portion of A */ 5389 for (j = 0; j < ncols_d; j++, k++) { 5390 cj[k] = *aj++; 5391 ca[k] = *aa++; 5392 } 5393 /* off-diagonal portion of A */ 5394 for (j = 0; j < ncols_o; j++, k++) { 5395 cj[k] = dn + *bj++; 5396 ca[k] = *ba++; 5397 } 5398 } 5399 /* put together the new matrix */ 5400 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5401 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5402 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5403 c = (Mat_SeqAIJ *)(*A_loc)->data; 5404 c->free_a = PETSC_TRUE; 5405 c->free_ij = PETSC_TRUE; 5406 c->nonew = 0; 5407 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5408 } else if (scall == MAT_REUSE_MATRIX) { 5409 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5410 for (i = 0; i < am; i++) { 5411 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5412 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5413 /* diagonal portion of A */ 5414 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5415 /* off-diagonal portion of A */ 5416 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5417 } 5418 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5419 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5420 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5421 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5422 if (glob) { 5423 PetscInt cst, *gidx; 5424 5425 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5426 PetscCall(PetscMalloc1(dn + on, &gidx)); 5427 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5428 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5429 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5430 } 5431 } 5432 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5433 PetscFunctionReturn(PETSC_SUCCESS); 5434 } 5435 5436 /*@C 5437 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5438 5439 Not Collective 5440 5441 Input Parameters: 5442 + A - the matrix 5443 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5444 . row - index set of rows to extract (or `NULL`) 5445 - col - index set of columns to extract (or `NULL`) 5446 5447 Output Parameter: 5448 . A_loc - the local sequential matrix generated 5449 5450 Level: developer 5451 5452 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5453 @*/ 5454 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5455 { 5456 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5457 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5458 IS isrowa, iscola; 5459 Mat *aloc; 5460 PetscBool match; 5461 5462 PetscFunctionBegin; 5463 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5464 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5465 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5466 if (!row) { 5467 start = A->rmap->rstart; 5468 end = A->rmap->rend; 5469 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5470 } else { 5471 isrowa = *row; 5472 } 5473 if (!col) { 5474 start = A->cmap->rstart; 5475 cmap = a->garray; 5476 nzA = a->A->cmap->n; 5477 nzB = a->B->cmap->n; 5478 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5479 ncols = 0; 5480 for (i = 0; i < nzB; i++) { 5481 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5482 else break; 5483 } 5484 imark = i; 5485 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5486 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5487 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5488 } else { 5489 iscola = *col; 5490 } 5491 if (scall != MAT_INITIAL_MATRIX) { 5492 PetscCall(PetscMalloc1(1, &aloc)); 5493 aloc[0] = *A_loc; 5494 } 5495 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5496 if (!col) { /* attach global id of condensed columns */ 5497 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5498 } 5499 *A_loc = aloc[0]; 5500 PetscCall(PetscFree(aloc)); 5501 if (!row) PetscCall(ISDestroy(&isrowa)); 5502 if (!col) PetscCall(ISDestroy(&iscola)); 5503 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5504 PetscFunctionReturn(PETSC_SUCCESS); 5505 } 5506 5507 /* 5508 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5509 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5510 * on a global size. 5511 * */ 5512 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5513 { 5514 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5515 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5516 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5517 PetscMPIInt owner; 5518 PetscSFNode *iremote, *oiremote; 5519 const PetscInt *lrowindices; 5520 PetscSF sf, osf; 5521 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5522 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5523 MPI_Comm comm; 5524 ISLocalToGlobalMapping mapping; 5525 const PetscScalar *pd_a, *po_a; 5526 5527 PetscFunctionBegin; 5528 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5529 /* plocalsize is the number of roots 5530 * nrows is the number of leaves 5531 * */ 5532 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5533 PetscCall(ISGetLocalSize(rows, &nrows)); 5534 PetscCall(PetscCalloc1(nrows, &iremote)); 5535 PetscCall(ISGetIndices(rows, &lrowindices)); 5536 for (i = 0; i < nrows; i++) { 5537 /* Find a remote index and an owner for a row 5538 * The row could be local or remote 5539 * */ 5540 owner = 0; 5541 lidx = 0; 5542 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5543 iremote[i].index = lidx; 5544 iremote[i].rank = owner; 5545 } 5546 /* Create SF to communicate how many nonzero columns for each row */ 5547 PetscCall(PetscSFCreate(comm, &sf)); 5548 /* SF will figure out the number of nonzero columns for each row, and their 5549 * offsets 5550 * */ 5551 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5552 PetscCall(PetscSFSetFromOptions(sf)); 5553 PetscCall(PetscSFSetUp(sf)); 5554 5555 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5556 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5557 PetscCall(PetscCalloc1(nrows, &pnnz)); 5558 roffsets[0] = 0; 5559 roffsets[1] = 0; 5560 for (i = 0; i < plocalsize; i++) { 5561 /* diagonal */ 5562 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5563 /* off-diagonal */ 5564 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5565 /* compute offsets so that we relative location for each row */ 5566 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5567 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5568 } 5569 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5570 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5571 /* 'r' means root, and 'l' means leaf */ 5572 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5573 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5574 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5575 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5576 PetscCall(PetscSFDestroy(&sf)); 5577 PetscCall(PetscFree(roffsets)); 5578 PetscCall(PetscFree(nrcols)); 5579 dntotalcols = 0; 5580 ontotalcols = 0; 5581 ncol = 0; 5582 for (i = 0; i < nrows; i++) { 5583 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5584 ncol = PetscMax(pnnz[i], ncol); 5585 /* diagonal */ 5586 dntotalcols += nlcols[i * 2 + 0]; 5587 /* off-diagonal */ 5588 ontotalcols += nlcols[i * 2 + 1]; 5589 } 5590 /* We do not need to figure the right number of columns 5591 * since all the calculations will be done by going through the raw data 5592 * */ 5593 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5594 PetscCall(MatSetUp(*P_oth)); 5595 PetscCall(PetscFree(pnnz)); 5596 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5597 /* diagonal */ 5598 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5599 /* off-diagonal */ 5600 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5601 /* diagonal */ 5602 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5603 /* off-diagonal */ 5604 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5605 dntotalcols = 0; 5606 ontotalcols = 0; 5607 ntotalcols = 0; 5608 for (i = 0; i < nrows; i++) { 5609 owner = 0; 5610 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5611 /* Set iremote for diag matrix */ 5612 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5613 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5614 iremote[dntotalcols].rank = owner; 5615 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5616 ilocal[dntotalcols++] = ntotalcols++; 5617 } 5618 /* off-diagonal */ 5619 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5620 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5621 oiremote[ontotalcols].rank = owner; 5622 oilocal[ontotalcols++] = ntotalcols++; 5623 } 5624 } 5625 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5626 PetscCall(PetscFree(loffsets)); 5627 PetscCall(PetscFree(nlcols)); 5628 PetscCall(PetscSFCreate(comm, &sf)); 5629 /* P serves as roots and P_oth is leaves 5630 * Diag matrix 5631 * */ 5632 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5633 PetscCall(PetscSFSetFromOptions(sf)); 5634 PetscCall(PetscSFSetUp(sf)); 5635 5636 PetscCall(PetscSFCreate(comm, &osf)); 5637 /* off-diagonal */ 5638 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5639 PetscCall(PetscSFSetFromOptions(osf)); 5640 PetscCall(PetscSFSetUp(osf)); 5641 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5642 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5643 /* operate on the matrix internal data to save memory */ 5644 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5645 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5647 /* Convert to global indices for diag matrix */ 5648 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5649 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5650 /* We want P_oth store global indices */ 5651 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5652 /* Use memory scalable approach */ 5653 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5654 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5655 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5656 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5657 /* Convert back to local indices */ 5658 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5659 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5660 nout = 0; 5661 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5662 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5663 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5664 /* Exchange values */ 5665 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5666 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5667 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5668 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5669 /* Stop PETSc from shrinking memory */ 5670 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5671 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5672 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5673 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5674 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5675 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5676 PetscCall(PetscSFDestroy(&sf)); 5677 PetscCall(PetscSFDestroy(&osf)); 5678 PetscFunctionReturn(PETSC_SUCCESS); 5679 } 5680 5681 /* 5682 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5683 * This supports MPIAIJ and MAIJ 5684 * */ 5685 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5686 { 5687 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5688 Mat_SeqAIJ *p_oth; 5689 IS rows, map; 5690 PetscHMapI hamp; 5691 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5692 MPI_Comm comm; 5693 PetscSF sf, osf; 5694 PetscBool has; 5695 5696 PetscFunctionBegin; 5697 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5698 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5699 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5700 * and then create a submatrix (that often is an overlapping matrix) 5701 * */ 5702 if (reuse == MAT_INITIAL_MATRIX) { 5703 /* Use a hash table to figure out unique keys */ 5704 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5705 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5706 count = 0; 5707 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5708 for (i = 0; i < a->B->cmap->n; i++) { 5709 key = a->garray[i] / dof; 5710 PetscCall(PetscHMapIHas(hamp, key, &has)); 5711 if (!has) { 5712 mapping[i] = count; 5713 PetscCall(PetscHMapISet(hamp, key, count++)); 5714 } else { 5715 /* Current 'i' has the same value the previous step */ 5716 mapping[i] = count - 1; 5717 } 5718 } 5719 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5720 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5721 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5722 PetscCall(PetscCalloc1(htsize, &rowindices)); 5723 off = 0; 5724 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5725 PetscCall(PetscHMapIDestroy(&hamp)); 5726 PetscCall(PetscSortInt(htsize, rowindices)); 5727 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5728 /* In case, the matrix was already created but users want to recreate the matrix */ 5729 PetscCall(MatDestroy(P_oth)); 5730 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5731 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5732 PetscCall(ISDestroy(&map)); 5733 PetscCall(ISDestroy(&rows)); 5734 } else if (reuse == MAT_REUSE_MATRIX) { 5735 /* If matrix was already created, we simply update values using SF objects 5736 * that as attached to the matrix earlier. 5737 */ 5738 const PetscScalar *pd_a, *po_a; 5739 5740 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5741 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5742 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5743 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5744 /* Update values in place */ 5745 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5746 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5747 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5748 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5749 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5750 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5752 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5753 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5754 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5755 PetscFunctionReturn(PETSC_SUCCESS); 5756 } 5757 5758 /*@C 5759 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5760 5761 Collective 5762 5763 Input Parameters: 5764 + A - the first matrix in `MATMPIAIJ` format 5765 . B - the second matrix in `MATMPIAIJ` format 5766 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5767 5768 Output Parameters: 5769 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5770 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5771 - B_seq - the sequential matrix generated 5772 5773 Level: developer 5774 5775 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5776 @*/ 5777 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5778 { 5779 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5780 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5781 IS isrowb, iscolb; 5782 Mat *bseq = NULL; 5783 5784 PetscFunctionBegin; 5785 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5786 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5787 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5788 5789 if (scall == MAT_INITIAL_MATRIX) { 5790 start = A->cmap->rstart; 5791 cmap = a->garray; 5792 nzA = a->A->cmap->n; 5793 nzB = a->B->cmap->n; 5794 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5795 ncols = 0; 5796 for (i = 0; i < nzB; i++) { /* row < local row index */ 5797 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5798 else break; 5799 } 5800 imark = i; 5801 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5802 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5803 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5804 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5805 } else { 5806 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5807 isrowb = *rowb; 5808 iscolb = *colb; 5809 PetscCall(PetscMalloc1(1, &bseq)); 5810 bseq[0] = *B_seq; 5811 } 5812 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5813 *B_seq = bseq[0]; 5814 PetscCall(PetscFree(bseq)); 5815 if (!rowb) { 5816 PetscCall(ISDestroy(&isrowb)); 5817 } else { 5818 *rowb = isrowb; 5819 } 5820 if (!colb) { 5821 PetscCall(ISDestroy(&iscolb)); 5822 } else { 5823 *colb = iscolb; 5824 } 5825 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5826 PetscFunctionReturn(PETSC_SUCCESS); 5827 } 5828 5829 /* 5830 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5831 of the OFF-DIAGONAL portion of local A 5832 5833 Collective 5834 5835 Input Parameters: 5836 + A,B - the matrices in `MATMPIAIJ` format 5837 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5838 5839 Output Parameter: 5840 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5841 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5842 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5843 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5844 5845 Developer Note: 5846 This directly accesses information inside the VecScatter associated with the matrix-vector product 5847 for this matrix. This is not desirable.. 5848 5849 Level: developer 5850 5851 */ 5852 5853 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5854 { 5855 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5856 VecScatter ctx; 5857 MPI_Comm comm; 5858 const PetscMPIInt *rprocs, *sprocs; 5859 PetscMPIInt nrecvs, nsends; 5860 const PetscInt *srow, *rstarts, *sstarts; 5861 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5862 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5863 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5864 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5865 PetscMPIInt size, tag, rank, nreqs; 5866 5867 PetscFunctionBegin; 5868 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5869 PetscCallMPI(MPI_Comm_size(comm, &size)); 5870 5871 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5872 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5873 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5874 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5875 5876 if (size == 1) { 5877 startsj_s = NULL; 5878 bufa_ptr = NULL; 5879 *B_oth = NULL; 5880 PetscFunctionReturn(PETSC_SUCCESS); 5881 } 5882 5883 ctx = a->Mvctx; 5884 tag = ((PetscObject)ctx)->tag; 5885 5886 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5887 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5888 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5889 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5890 PetscCall(PetscMalloc1(nreqs, &reqs)); 5891 rwaits = reqs; 5892 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5893 5894 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5895 if (scall == MAT_INITIAL_MATRIX) { 5896 /* i-array */ 5897 /* post receives */ 5898 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5899 for (i = 0; i < nrecvs; i++) { 5900 rowlen = rvalues + rstarts[i] * rbs; 5901 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5902 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5903 } 5904 5905 /* pack the outgoing message */ 5906 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5907 5908 sstartsj[0] = 0; 5909 rstartsj[0] = 0; 5910 len = 0; /* total length of j or a array to be sent */ 5911 if (nsends) { 5912 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5913 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5914 } 5915 for (i = 0; i < nsends; i++) { 5916 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5917 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5918 for (j = 0; j < nrows; j++) { 5919 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5920 for (l = 0; l < sbs; l++) { 5921 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5922 5923 rowlen[j * sbs + l] = ncols; 5924 5925 len += ncols; 5926 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5927 } 5928 k++; 5929 } 5930 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5931 5932 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5933 } 5934 /* recvs and sends of i-array are completed */ 5935 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5936 PetscCall(PetscFree(svalues)); 5937 5938 /* allocate buffers for sending j and a arrays */ 5939 PetscCall(PetscMalloc1(len + 1, &bufj)); 5940 PetscCall(PetscMalloc1(len + 1, &bufa)); 5941 5942 /* create i-array of B_oth */ 5943 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5944 5945 b_othi[0] = 0; 5946 len = 0; /* total length of j or a array to be received */ 5947 k = 0; 5948 for (i = 0; i < nrecvs; i++) { 5949 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5950 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5951 for (j = 0; j < nrows; j++) { 5952 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5953 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5954 k++; 5955 } 5956 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5957 } 5958 PetscCall(PetscFree(rvalues)); 5959 5960 /* allocate space for j and a arrays of B_oth */ 5961 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5962 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5963 5964 /* j-array */ 5965 /* post receives of j-array */ 5966 for (i = 0; i < nrecvs; i++) { 5967 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5968 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5969 } 5970 5971 /* pack the outgoing message j-array */ 5972 if (nsends) k = sstarts[0]; 5973 for (i = 0; i < nsends; i++) { 5974 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5975 bufJ = bufj + sstartsj[i]; 5976 for (j = 0; j < nrows; j++) { 5977 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5978 for (ll = 0; ll < sbs; ll++) { 5979 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5980 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5981 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5982 } 5983 } 5984 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5985 } 5986 5987 /* recvs and sends of j-array are completed */ 5988 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5989 } else if (scall == MAT_REUSE_MATRIX) { 5990 sstartsj = *startsj_s; 5991 rstartsj = *startsj_r; 5992 bufa = *bufa_ptr; 5993 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5994 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5995 5996 /* a-array */ 5997 /* post receives of a-array */ 5998 for (i = 0; i < nrecvs; i++) { 5999 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6000 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6001 } 6002 6003 /* pack the outgoing message a-array */ 6004 if (nsends) k = sstarts[0]; 6005 for (i = 0; i < nsends; i++) { 6006 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6007 bufA = bufa + sstartsj[i]; 6008 for (j = 0; j < nrows; j++) { 6009 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6010 for (ll = 0; ll < sbs; ll++) { 6011 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6012 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6013 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6014 } 6015 } 6016 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6017 } 6018 /* recvs and sends of a-array are completed */ 6019 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6020 PetscCall(PetscFree(reqs)); 6021 6022 if (scall == MAT_INITIAL_MATRIX) { 6023 Mat_SeqAIJ *b_oth; 6024 6025 /* put together the new matrix */ 6026 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6027 6028 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6029 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6030 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6031 b_oth->free_a = PETSC_TRUE; 6032 b_oth->free_ij = PETSC_TRUE; 6033 b_oth->nonew = 0; 6034 6035 PetscCall(PetscFree(bufj)); 6036 if (!startsj_s || !bufa_ptr) { 6037 PetscCall(PetscFree2(sstartsj, rstartsj)); 6038 PetscCall(PetscFree(bufa_ptr)); 6039 } else { 6040 *startsj_s = sstartsj; 6041 *startsj_r = rstartsj; 6042 *bufa_ptr = bufa; 6043 } 6044 } else if (scall == MAT_REUSE_MATRIX) { 6045 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6046 } 6047 6048 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6049 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6050 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6051 PetscFunctionReturn(PETSC_SUCCESS); 6052 } 6053 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6057 #if defined(PETSC_HAVE_MKL_SPARSE) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6062 #if defined(PETSC_HAVE_ELEMENTAL) 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 #if defined(PETSC_HAVE_SCALAPACK) 6066 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_HYPRE) 6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 #if defined(PETSC_HAVE_CUDA) 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6073 #endif 6074 #if defined(PETSC_HAVE_HIP) 6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6076 #endif 6077 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6079 #endif 6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6081 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6082 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6083 6084 /* 6085 Computes (B'*A')' since computing B*A directly is untenable 6086 6087 n p p 6088 [ ] [ ] [ ] 6089 m [ A ] * n [ B ] = m [ C ] 6090 [ ] [ ] [ ] 6091 6092 */ 6093 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6094 { 6095 Mat At, Bt, Ct; 6096 6097 PetscFunctionBegin; 6098 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6099 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6100 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6101 PetscCall(MatDestroy(&At)); 6102 PetscCall(MatDestroy(&Bt)); 6103 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6104 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6105 PetscCall(MatDestroy(&Ct)); 6106 PetscFunctionReturn(PETSC_SUCCESS); 6107 } 6108 6109 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6110 { 6111 PetscBool cisdense; 6112 6113 PetscFunctionBegin; 6114 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6115 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6116 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6117 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6118 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6119 PetscCall(MatSetUp(C)); 6120 6121 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6122 PetscFunctionReturn(PETSC_SUCCESS); 6123 } 6124 6125 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6126 { 6127 Mat_Product *product = C->product; 6128 Mat A = product->A, B = product->B; 6129 6130 PetscFunctionBegin; 6131 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6132 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6133 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6134 C->ops->productsymbolic = MatProductSymbolic_AB; 6135 PetscFunctionReturn(PETSC_SUCCESS); 6136 } 6137 6138 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6139 { 6140 Mat_Product *product = C->product; 6141 6142 PetscFunctionBegin; 6143 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6144 PetscFunctionReturn(PETSC_SUCCESS); 6145 } 6146 6147 /* 6148 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6149 6150 Input Parameters: 6151 6152 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6153 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6154 6155 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6156 6157 For Set1, j1[] contains column indices of the nonzeros. 6158 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6159 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6160 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6161 6162 Similar for Set2. 6163 6164 This routine merges the two sets of nonzeros row by row and removes repeats. 6165 6166 Output Parameters: (memory is allocated by the caller) 6167 6168 i[],j[]: the CSR of the merged matrix, which has m rows. 6169 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6170 imap2[]: similar to imap1[], but for Set2. 6171 Note we order nonzeros row-by-row and from left to right. 6172 */ 6173 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6174 { 6175 PetscInt r, m; /* Row index of mat */ 6176 PetscCount t, t1, t2, b1, e1, b2, e2; 6177 6178 PetscFunctionBegin; 6179 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6180 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6181 i[0] = 0; 6182 for (r = 0; r < m; r++) { /* Do row by row merging */ 6183 b1 = rowBegin1[r]; 6184 e1 = rowEnd1[r]; 6185 b2 = rowBegin2[r]; 6186 e2 = rowEnd2[r]; 6187 while (b1 < e1 && b2 < e2) { 6188 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6189 j[t] = j1[b1]; 6190 imap1[t1] = t; 6191 imap2[t2] = t; 6192 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6193 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6194 t1++; 6195 t2++; 6196 t++; 6197 } else if (j1[b1] < j2[b2]) { 6198 j[t] = j1[b1]; 6199 imap1[t1] = t; 6200 b1 += jmap1[t1 + 1] - jmap1[t1]; 6201 t1++; 6202 t++; 6203 } else { 6204 j[t] = j2[b2]; 6205 imap2[t2] = t; 6206 b2 += jmap2[t2 + 1] - jmap2[t2]; 6207 t2++; 6208 t++; 6209 } 6210 } 6211 /* Merge the remaining in either j1[] or j2[] */ 6212 while (b1 < e1) { 6213 j[t] = j1[b1]; 6214 imap1[t1] = t; 6215 b1 += jmap1[t1 + 1] - jmap1[t1]; 6216 t1++; 6217 t++; 6218 } 6219 while (b2 < e2) { 6220 j[t] = j2[b2]; 6221 imap2[t2] = t; 6222 b2 += jmap2[t2 + 1] - jmap2[t2]; 6223 t2++; 6224 t++; 6225 } 6226 PetscCall(PetscIntCast(t, i + r + 1)); 6227 } 6228 PetscFunctionReturn(PETSC_SUCCESS); 6229 } 6230 6231 /* 6232 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6233 6234 Input Parameters: 6235 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6236 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6237 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6238 6239 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6240 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6241 6242 Output Parameters: 6243 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6244 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6245 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6246 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6247 6248 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6249 Atot: number of entries belonging to the diagonal block. 6250 Annz: number of unique nonzeros belonging to the diagonal block. 6251 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6252 repeats (i.e., same 'i,j' pair). 6253 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6254 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6255 6256 Atot: number of entries belonging to the diagonal block 6257 Annz: number of unique nonzeros belonging to the diagonal block. 6258 6259 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6260 6261 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6262 */ 6263 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6264 { 6265 PetscInt cstart, cend, rstart, rend, row, col; 6266 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6267 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6268 PetscCount k, m, p, q, r, s, mid; 6269 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6270 6271 PetscFunctionBegin; 6272 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6273 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6274 m = rend - rstart; 6275 6276 /* Skip negative rows */ 6277 for (k = 0; k < n; k++) 6278 if (i[k] >= 0) break; 6279 6280 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6281 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6282 */ 6283 while (k < n) { 6284 row = i[k]; 6285 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6286 for (s = k; s < n; s++) 6287 if (i[s] != row) break; 6288 6289 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6290 for (p = k; p < s; p++) { 6291 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6292 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6293 } 6294 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6295 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6296 rowBegin[row - rstart] = k; 6297 rowMid[row - rstart] = mid; 6298 rowEnd[row - rstart] = s; 6299 6300 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6301 Atot += mid - k; 6302 Btot += s - mid; 6303 6304 /* Count unique nonzeros of this diag row */ 6305 for (p = k; p < mid;) { 6306 col = j[p]; 6307 do { 6308 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6309 p++; 6310 } while (p < mid && j[p] == col); 6311 Annz++; 6312 } 6313 6314 /* Count unique nonzeros of this offdiag row */ 6315 for (p = mid; p < s;) { 6316 col = j[p]; 6317 do { 6318 p++; 6319 } while (p < s && j[p] == col); 6320 Bnnz++; 6321 } 6322 k = s; 6323 } 6324 6325 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6326 PetscCall(PetscMalloc1(Atot, &Aperm)); 6327 PetscCall(PetscMalloc1(Btot, &Bperm)); 6328 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6329 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6330 6331 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6332 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6333 for (r = 0; r < m; r++) { 6334 k = rowBegin[r]; 6335 mid = rowMid[r]; 6336 s = rowEnd[r]; 6337 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6338 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6339 Atot += mid - k; 6340 Btot += s - mid; 6341 6342 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6343 for (p = k; p < mid;) { 6344 col = j[p]; 6345 q = p; 6346 do { 6347 p++; 6348 } while (p < mid && j[p] == col); 6349 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6350 Annz++; 6351 } 6352 6353 for (p = mid; p < s;) { 6354 col = j[p]; 6355 q = p; 6356 do { 6357 p++; 6358 } while (p < s && j[p] == col); 6359 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6360 Bnnz++; 6361 } 6362 } 6363 /* Output */ 6364 *Aperm_ = Aperm; 6365 *Annz_ = Annz; 6366 *Atot_ = Atot; 6367 *Ajmap_ = Ajmap; 6368 *Bperm_ = Bperm; 6369 *Bnnz_ = Bnnz; 6370 *Btot_ = Btot; 6371 *Bjmap_ = Bjmap; 6372 PetscFunctionReturn(PETSC_SUCCESS); 6373 } 6374 6375 /* 6376 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6377 6378 Input Parameters: 6379 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6380 nnz: number of unique nonzeros in the merged matrix 6381 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6382 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6383 6384 Output Parameter: (memory is allocated by the caller) 6385 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6386 6387 Example: 6388 nnz1 = 4 6389 nnz = 6 6390 imap = [1,3,4,5] 6391 jmap = [0,3,5,6,7] 6392 then, 6393 jmap_new = [0,0,3,3,5,6,7] 6394 */ 6395 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6396 { 6397 PetscCount k, p; 6398 6399 PetscFunctionBegin; 6400 jmap_new[0] = 0; 6401 p = nnz; /* p loops over jmap_new[] backwards */ 6402 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6403 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6404 } 6405 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6406 PetscFunctionReturn(PETSC_SUCCESS); 6407 } 6408 6409 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6410 { 6411 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6412 6413 PetscFunctionBegin; 6414 PetscCall(PetscSFDestroy(&coo->sf)); 6415 PetscCall(PetscFree(coo->Aperm1)); 6416 PetscCall(PetscFree(coo->Bperm1)); 6417 PetscCall(PetscFree(coo->Ajmap1)); 6418 PetscCall(PetscFree(coo->Bjmap1)); 6419 PetscCall(PetscFree(coo->Aimap2)); 6420 PetscCall(PetscFree(coo->Bimap2)); 6421 PetscCall(PetscFree(coo->Aperm2)); 6422 PetscCall(PetscFree(coo->Bperm2)); 6423 PetscCall(PetscFree(coo->Ajmap2)); 6424 PetscCall(PetscFree(coo->Bjmap2)); 6425 PetscCall(PetscFree(coo->Cperm1)); 6426 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6427 PetscCall(PetscFree(coo)); 6428 PetscFunctionReturn(PETSC_SUCCESS); 6429 } 6430 6431 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6432 { 6433 MPI_Comm comm; 6434 PetscMPIInt rank, size; 6435 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6436 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6437 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6438 PetscContainer container; 6439 MatCOOStruct_MPIAIJ *coo; 6440 6441 PetscFunctionBegin; 6442 PetscCall(PetscFree(mpiaij->garray)); 6443 PetscCall(VecDestroy(&mpiaij->lvec)); 6444 #if defined(PETSC_USE_CTABLE) 6445 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6446 #else 6447 PetscCall(PetscFree(mpiaij->colmap)); 6448 #endif 6449 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6450 mat->assembled = PETSC_FALSE; 6451 mat->was_assembled = PETSC_FALSE; 6452 6453 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6454 PetscCallMPI(MPI_Comm_size(comm, &size)); 6455 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6456 PetscCall(PetscLayoutSetUp(mat->rmap)); 6457 PetscCall(PetscLayoutSetUp(mat->cmap)); 6458 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6459 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6460 PetscCall(MatGetLocalSize(mat, &m, &n)); 6461 PetscCall(MatGetSize(mat, &M, &N)); 6462 6463 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6464 /* entries come first, then local rows, then remote rows. */ 6465 PetscCount n1 = coo_n, *perm1; 6466 PetscInt *i1 = coo_i, *j1 = coo_j; 6467 6468 PetscCall(PetscMalloc1(n1, &perm1)); 6469 for (k = 0; k < n1; k++) perm1[k] = k; 6470 6471 /* Manipulate indices so that entries with negative row or col indices will have smallest 6472 row indices, local entries will have greater but negative row indices, and remote entries 6473 will have positive row indices. 6474 */ 6475 for (k = 0; k < n1; k++) { 6476 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6477 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6478 else { 6479 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6480 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6481 } 6482 } 6483 6484 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6485 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6486 6487 /* Advance k to the first entry we need to take care of */ 6488 for (k = 0; k < n1; k++) 6489 if (i1[k] > PETSC_INT_MIN) break; 6490 PetscCount i1start = k; 6491 6492 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6493 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6494 6495 /* Send remote rows to their owner */ 6496 /* Find which rows should be sent to which remote ranks*/ 6497 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6498 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6499 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6500 const PetscInt *ranges; 6501 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6502 6503 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6504 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6505 for (k = rem; k < n1;) { 6506 PetscMPIInt owner; 6507 PetscInt firstRow, lastRow; 6508 6509 /* Locate a row range */ 6510 firstRow = i1[k]; /* first row of this owner */ 6511 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6512 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6513 6514 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6515 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6516 6517 /* All entries in [k,p) belong to this remote owner */ 6518 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6519 PetscMPIInt *sendto2; 6520 PetscInt *nentries2; 6521 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6522 6523 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6524 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6525 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6526 PetscCall(PetscFree2(sendto, nentries2)); 6527 sendto = sendto2; 6528 nentries = nentries2; 6529 maxNsend = maxNsend2; 6530 } 6531 sendto[nsend] = owner; 6532 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6533 nsend++; 6534 k = p; 6535 } 6536 6537 /* Build 1st SF to know offsets on remote to send data */ 6538 PetscSF sf1; 6539 PetscInt nroots = 1, nroots2 = 0; 6540 PetscInt nleaves = nsend, nleaves2 = 0; 6541 PetscInt *offsets; 6542 PetscSFNode *iremote; 6543 6544 PetscCall(PetscSFCreate(comm, &sf1)); 6545 PetscCall(PetscMalloc1(nsend, &iremote)); 6546 PetscCall(PetscMalloc1(nsend, &offsets)); 6547 for (k = 0; k < nsend; k++) { 6548 iremote[k].rank = sendto[k]; 6549 iremote[k].index = 0; 6550 nleaves2 += nentries[k]; 6551 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6552 } 6553 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6554 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6555 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6556 PetscCall(PetscSFDestroy(&sf1)); 6557 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6558 6559 /* Build 2nd SF to send remote COOs to their owner */ 6560 PetscSF sf2; 6561 nroots = nroots2; 6562 nleaves = nleaves2; 6563 PetscCall(PetscSFCreate(comm, &sf2)); 6564 PetscCall(PetscSFSetFromOptions(sf2)); 6565 PetscCall(PetscMalloc1(nleaves, &iremote)); 6566 p = 0; 6567 for (k = 0; k < nsend; k++) { 6568 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6569 for (q = 0; q < nentries[k]; q++, p++) { 6570 iremote[p].rank = sendto[k]; 6571 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6572 } 6573 } 6574 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6575 6576 /* Send the remote COOs to their owner */ 6577 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6578 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6579 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6580 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6581 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6582 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6583 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6584 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6585 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6586 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6587 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6588 6589 PetscCall(PetscFree(offsets)); 6590 PetscCall(PetscFree2(sendto, nentries)); 6591 6592 /* Sort received COOs by row along with the permutation array */ 6593 for (k = 0; k < n2; k++) perm2[k] = k; 6594 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6595 6596 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6597 PetscCount *Cperm1; 6598 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6599 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6600 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6601 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6602 6603 /* Support for HYPRE matrices, kind of a hack. 6604 Swap min column with diagonal so that diagonal values will go first */ 6605 PetscBool hypre; 6606 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6607 if (hypre) { 6608 PetscInt *minj; 6609 PetscBT hasdiag; 6610 6611 PetscCall(PetscBTCreate(m, &hasdiag)); 6612 PetscCall(PetscMalloc1(m, &minj)); 6613 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6614 for (k = i1start; k < rem; k++) { 6615 if (j1[k] < cstart || j1[k] >= cend) continue; 6616 const PetscInt rindex = i1[k] - rstart; 6617 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6618 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6619 } 6620 for (k = 0; k < n2; k++) { 6621 if (j2[k] < cstart || j2[k] >= cend) continue; 6622 const PetscInt rindex = i2[k] - rstart; 6623 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6624 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6625 } 6626 for (k = i1start; k < rem; k++) { 6627 const PetscInt rindex = i1[k] - rstart; 6628 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6629 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6630 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6631 } 6632 for (k = 0; k < n2; k++) { 6633 const PetscInt rindex = i2[k] - rstart; 6634 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6635 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6636 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6637 } 6638 PetscCall(PetscBTDestroy(&hasdiag)); 6639 PetscCall(PetscFree(minj)); 6640 } 6641 6642 /* Split local COOs and received COOs into diag/offdiag portions */ 6643 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6644 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6645 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6646 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6647 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6648 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6649 6650 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6651 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6652 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6653 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6654 6655 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6656 PetscInt *Ai, *Bi; 6657 PetscInt *Aj, *Bj; 6658 6659 PetscCall(PetscMalloc1(m + 1, &Ai)); 6660 PetscCall(PetscMalloc1(m + 1, &Bi)); 6661 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6662 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6663 6664 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6665 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6666 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6667 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6668 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6669 6670 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6671 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6672 6673 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6674 /* expect nonzeros in A/B most likely have local contributing entries */ 6675 PetscInt Annz = Ai[m]; 6676 PetscInt Bnnz = Bi[m]; 6677 PetscCount *Ajmap1_new, *Bjmap1_new; 6678 6679 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6680 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6681 6682 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6683 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6684 6685 PetscCall(PetscFree(Aimap1)); 6686 PetscCall(PetscFree(Ajmap1)); 6687 PetscCall(PetscFree(Bimap1)); 6688 PetscCall(PetscFree(Bjmap1)); 6689 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6690 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6691 PetscCall(PetscFree(perm1)); 6692 PetscCall(PetscFree3(i2, j2, perm2)); 6693 6694 Ajmap1 = Ajmap1_new; 6695 Bjmap1 = Bjmap1_new; 6696 6697 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6698 if (Annz < Annz1 + Annz2) { 6699 PetscInt *Aj_new; 6700 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6701 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6702 PetscCall(PetscFree(Aj)); 6703 Aj = Aj_new; 6704 } 6705 6706 if (Bnnz < Bnnz1 + Bnnz2) { 6707 PetscInt *Bj_new; 6708 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6709 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6710 PetscCall(PetscFree(Bj)); 6711 Bj = Bj_new; 6712 } 6713 6714 /* Create new submatrices for on-process and off-process coupling */ 6715 PetscScalar *Aa, *Ba; 6716 MatType rtype; 6717 Mat_SeqAIJ *a, *b; 6718 PetscObjectState state; 6719 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6720 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6721 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6722 if (cstart) { 6723 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6724 } 6725 6726 PetscCall(MatGetRootType_Private(mat, &rtype)); 6727 6728 MatSeqXAIJGetOptions_Private(mpiaij->A); 6729 PetscCall(MatDestroy(&mpiaij->A)); 6730 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6731 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6732 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6733 6734 MatSeqXAIJGetOptions_Private(mpiaij->B); 6735 PetscCall(MatDestroy(&mpiaij->B)); 6736 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6737 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6738 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6739 6740 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6741 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6742 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6743 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6744 6745 a = (Mat_SeqAIJ *)mpiaij->A->data; 6746 b = (Mat_SeqAIJ *)mpiaij->B->data; 6747 a->free_a = PETSC_TRUE; 6748 a->free_ij = PETSC_TRUE; 6749 b->free_a = PETSC_TRUE; 6750 b->free_ij = PETSC_TRUE; 6751 a->maxnz = a->nz; 6752 b->maxnz = b->nz; 6753 6754 /* conversion must happen AFTER multiply setup */ 6755 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6756 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6757 PetscCall(VecDestroy(&mpiaij->lvec)); 6758 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6759 6760 // Put the COO struct in a container and then attach that to the matrix 6761 PetscCall(PetscMalloc1(1, &coo)); 6762 coo->n = coo_n; 6763 coo->sf = sf2; 6764 coo->sendlen = nleaves; 6765 coo->recvlen = nroots; 6766 coo->Annz = Annz; 6767 coo->Bnnz = Bnnz; 6768 coo->Annz2 = Annz2; 6769 coo->Bnnz2 = Bnnz2; 6770 coo->Atot1 = Atot1; 6771 coo->Atot2 = Atot2; 6772 coo->Btot1 = Btot1; 6773 coo->Btot2 = Btot2; 6774 coo->Ajmap1 = Ajmap1; 6775 coo->Aperm1 = Aperm1; 6776 coo->Bjmap1 = Bjmap1; 6777 coo->Bperm1 = Bperm1; 6778 coo->Aimap2 = Aimap2; 6779 coo->Ajmap2 = Ajmap2; 6780 coo->Aperm2 = Aperm2; 6781 coo->Bimap2 = Bimap2; 6782 coo->Bjmap2 = Bjmap2; 6783 coo->Bperm2 = Bperm2; 6784 coo->Cperm1 = Cperm1; 6785 // Allocate in preallocation. If not used, it has zero cost on host 6786 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6787 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6788 PetscCall(PetscContainerSetPointer(container, coo)); 6789 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6790 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6791 PetscCall(PetscContainerDestroy(&container)); 6792 PetscFunctionReturn(PETSC_SUCCESS); 6793 } 6794 6795 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6796 { 6797 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6798 Mat A = mpiaij->A, B = mpiaij->B; 6799 PetscScalar *Aa, *Ba; 6800 PetscScalar *sendbuf, *recvbuf; 6801 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6802 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6803 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6804 const PetscCount *Cperm1; 6805 PetscContainer container; 6806 MatCOOStruct_MPIAIJ *coo; 6807 6808 PetscFunctionBegin; 6809 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6810 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6811 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6812 sendbuf = coo->sendbuf; 6813 recvbuf = coo->recvbuf; 6814 Ajmap1 = coo->Ajmap1; 6815 Ajmap2 = coo->Ajmap2; 6816 Aimap2 = coo->Aimap2; 6817 Bjmap1 = coo->Bjmap1; 6818 Bjmap2 = coo->Bjmap2; 6819 Bimap2 = coo->Bimap2; 6820 Aperm1 = coo->Aperm1; 6821 Aperm2 = coo->Aperm2; 6822 Bperm1 = coo->Bperm1; 6823 Bperm2 = coo->Bperm2; 6824 Cperm1 = coo->Cperm1; 6825 6826 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6827 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6828 6829 /* Pack entries to be sent to remote */ 6830 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6831 6832 /* Send remote entries to their owner and overlap the communication with local computation */ 6833 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6834 /* Add local entries to A and B */ 6835 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6836 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6837 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6838 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6839 } 6840 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6841 PetscScalar sum = 0.0; 6842 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6843 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6844 } 6845 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6846 6847 /* Add received remote entries to A and B */ 6848 for (PetscCount i = 0; i < coo->Annz2; i++) { 6849 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6850 } 6851 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6852 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6853 } 6854 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6855 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6856 PetscFunctionReturn(PETSC_SUCCESS); 6857 } 6858 6859 /*MC 6860 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6861 6862 Options Database Keys: 6863 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6864 6865 Level: beginner 6866 6867 Notes: 6868 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6869 in this case the values associated with the rows and columns one passes in are set to zero 6870 in the matrix 6871 6872 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6873 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6874 6875 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6876 M*/ 6877 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6878 { 6879 Mat_MPIAIJ *b; 6880 PetscMPIInt size; 6881 6882 PetscFunctionBegin; 6883 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6884 6885 PetscCall(PetscNew(&b)); 6886 B->data = (void *)b; 6887 B->ops[0] = MatOps_Values; 6888 B->assembled = PETSC_FALSE; 6889 B->insertmode = NOT_SET_VALUES; 6890 b->size = size; 6891 6892 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6893 6894 /* build cache for off array entries formed */ 6895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6896 6897 b->donotstash = PETSC_FALSE; 6898 b->colmap = NULL; 6899 b->garray = NULL; 6900 b->roworiented = PETSC_TRUE; 6901 6902 /* stuff used for matrix vector multiply */ 6903 b->lvec = NULL; 6904 b->Mvctx = NULL; 6905 6906 /* stuff for MatGetRow() */ 6907 b->rowindices = NULL; 6908 b->rowvalues = NULL; 6909 b->getrowactive = PETSC_FALSE; 6910 6911 /* flexible pointer used in CUSPARSE classes */ 6912 b->spptr = NULL; 6913 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6925 #if defined(PETSC_HAVE_CUDA) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6927 #endif 6928 #if defined(PETSC_HAVE_HIP) 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6930 #endif 6931 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6933 #endif 6934 #if defined(PETSC_HAVE_MKL_SPARSE) 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6936 #endif 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6941 #if defined(PETSC_HAVE_ELEMENTAL) 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6943 #endif 6944 #if defined(PETSC_HAVE_SCALAPACK) 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6946 #endif 6947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6949 #if defined(PETSC_HAVE_HYPRE) 6950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6951 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6952 #endif 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6954 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6955 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6956 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6957 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6958 PetscFunctionReturn(PETSC_SUCCESS); 6959 } 6960 6961 /*@ 6962 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6963 and "off-diagonal" part of the matrix in CSR format. 6964 6965 Collective 6966 6967 Input Parameters: 6968 + comm - MPI communicator 6969 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6970 . n - This value should be the same as the local size used in creating the 6971 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6972 calculated if `N` is given) For square matrices `n` is almost always `m`. 6973 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6974 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6975 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6976 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6977 . a - matrix values 6978 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6979 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6980 - oa - matrix values 6981 6982 Output Parameter: 6983 . mat - the matrix 6984 6985 Level: advanced 6986 6987 Notes: 6988 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6989 must free the arrays once the matrix has been destroyed and not before. 6990 6991 The `i` and `j` indices are 0 based 6992 6993 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6994 6995 This sets local rows and cannot be used to set off-processor values. 6996 6997 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6998 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6999 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 7000 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 7001 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7002 communication if it is known that only local entries will be set. 7003 7004 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7005 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7006 @*/ 7007 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7008 { 7009 Mat_MPIAIJ *maij; 7010 7011 PetscFunctionBegin; 7012 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7013 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7014 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7015 PetscCall(MatCreate(comm, mat)); 7016 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7017 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7018 maij = (Mat_MPIAIJ *)(*mat)->data; 7019 7020 (*mat)->preallocated = PETSC_TRUE; 7021 7022 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7023 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7024 7025 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7026 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7027 7028 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7029 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7030 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7031 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7032 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7033 PetscFunctionReturn(PETSC_SUCCESS); 7034 } 7035 7036 typedef struct { 7037 Mat *mp; /* intermediate products */ 7038 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7039 PetscInt cp; /* number of intermediate products */ 7040 7041 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7042 PetscInt *startsj_s, *startsj_r; 7043 PetscScalar *bufa; 7044 Mat P_oth; 7045 7046 /* may take advantage of merging product->B */ 7047 Mat Bloc; /* B-local by merging diag and off-diag */ 7048 7049 /* cusparse does not have support to split between symbolic and numeric phases. 7050 When api_user is true, we don't need to update the numerical values 7051 of the temporary storage */ 7052 PetscBool reusesym; 7053 7054 /* support for COO values insertion */ 7055 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7056 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7057 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7058 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7059 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7060 PetscMemType mtype; 7061 7062 /* customization */ 7063 PetscBool abmerge; 7064 PetscBool P_oth_bind; 7065 } MatMatMPIAIJBACKEND; 7066 7067 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7068 { 7069 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7070 PetscInt i; 7071 7072 PetscFunctionBegin; 7073 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7074 PetscCall(PetscFree(mmdata->bufa)); 7075 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7076 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7077 PetscCall(MatDestroy(&mmdata->P_oth)); 7078 PetscCall(MatDestroy(&mmdata->Bloc)); 7079 PetscCall(PetscSFDestroy(&mmdata->sf)); 7080 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7081 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7082 PetscCall(PetscFree(mmdata->own[0])); 7083 PetscCall(PetscFree(mmdata->own)); 7084 PetscCall(PetscFree(mmdata->off[0])); 7085 PetscCall(PetscFree(mmdata->off)); 7086 PetscCall(PetscFree(mmdata)); 7087 PetscFunctionReturn(PETSC_SUCCESS); 7088 } 7089 7090 /* Copy selected n entries with indices in idx[] of A to v[]. 7091 If idx is NULL, copy the whole data array of A to v[] 7092 */ 7093 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7094 { 7095 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7096 7097 PetscFunctionBegin; 7098 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7099 if (f) { 7100 PetscCall((*f)(A, n, idx, v)); 7101 } else { 7102 const PetscScalar *vv; 7103 7104 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7105 if (n && idx) { 7106 PetscScalar *w = v; 7107 const PetscInt *oi = idx; 7108 PetscInt j; 7109 7110 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7111 } else { 7112 PetscCall(PetscArraycpy(v, vv, n)); 7113 } 7114 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7115 } 7116 PetscFunctionReturn(PETSC_SUCCESS); 7117 } 7118 7119 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7120 { 7121 MatMatMPIAIJBACKEND *mmdata; 7122 PetscInt i, n_d, n_o; 7123 7124 PetscFunctionBegin; 7125 MatCheckProduct(C, 1); 7126 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7127 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7128 if (!mmdata->reusesym) { /* update temporary matrices */ 7129 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7130 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7131 } 7132 mmdata->reusesym = PETSC_FALSE; 7133 7134 for (i = 0; i < mmdata->cp; i++) { 7135 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7136 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7137 } 7138 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7139 PetscInt noff; 7140 7141 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7142 if (mmdata->mptmp[i]) continue; 7143 if (noff) { 7144 PetscInt nown; 7145 7146 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7147 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7148 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7149 n_o += noff; 7150 n_d += nown; 7151 } else { 7152 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7153 7154 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7155 n_d += mm->nz; 7156 } 7157 } 7158 if (mmdata->hasoffproc) { /* offprocess insertion */ 7159 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7160 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7161 } 7162 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7163 PetscFunctionReturn(PETSC_SUCCESS); 7164 } 7165 7166 /* Support for Pt * A, A * P, or Pt * A * P */ 7167 #define MAX_NUMBER_INTERMEDIATE 4 7168 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7169 { 7170 Mat_Product *product = C->product; 7171 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7172 Mat_MPIAIJ *a, *p; 7173 MatMatMPIAIJBACKEND *mmdata; 7174 ISLocalToGlobalMapping P_oth_l2g = NULL; 7175 IS glob = NULL; 7176 const char *prefix; 7177 char pprefix[256]; 7178 const PetscInt *globidx, *P_oth_idx; 7179 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7180 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7181 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7182 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7183 /* a base offset; type-2: sparse with a local to global map table */ 7184 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7185 7186 MatProductType ptype; 7187 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7188 PetscMPIInt size; 7189 7190 PetscFunctionBegin; 7191 MatCheckProduct(C, 1); 7192 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7193 ptype = product->type; 7194 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7195 ptype = MATPRODUCT_AB; 7196 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7197 } 7198 switch (ptype) { 7199 case MATPRODUCT_AB: 7200 A = product->A; 7201 P = product->B; 7202 m = A->rmap->n; 7203 n = P->cmap->n; 7204 M = A->rmap->N; 7205 N = P->cmap->N; 7206 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7207 break; 7208 case MATPRODUCT_AtB: 7209 P = product->A; 7210 A = product->B; 7211 m = P->cmap->n; 7212 n = A->cmap->n; 7213 M = P->cmap->N; 7214 N = A->cmap->N; 7215 hasoffproc = PETSC_TRUE; 7216 break; 7217 case MATPRODUCT_PtAP: 7218 A = product->A; 7219 P = product->B; 7220 m = P->cmap->n; 7221 n = P->cmap->n; 7222 M = P->cmap->N; 7223 N = P->cmap->N; 7224 hasoffproc = PETSC_TRUE; 7225 break; 7226 default: 7227 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7228 } 7229 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7230 if (size == 1) hasoffproc = PETSC_FALSE; 7231 7232 /* defaults */ 7233 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7234 mp[i] = NULL; 7235 mptmp[i] = PETSC_FALSE; 7236 rmapt[i] = -1; 7237 cmapt[i] = -1; 7238 rmapa[i] = NULL; 7239 cmapa[i] = NULL; 7240 } 7241 7242 /* customization */ 7243 PetscCall(PetscNew(&mmdata)); 7244 mmdata->reusesym = product->api_user; 7245 if (ptype == MATPRODUCT_AB) { 7246 if (product->api_user) { 7247 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7248 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7249 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7250 PetscOptionsEnd(); 7251 } else { 7252 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7253 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7254 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7255 PetscOptionsEnd(); 7256 } 7257 } else if (ptype == MATPRODUCT_PtAP) { 7258 if (product->api_user) { 7259 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7260 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7261 PetscOptionsEnd(); 7262 } else { 7263 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7264 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7265 PetscOptionsEnd(); 7266 } 7267 } 7268 a = (Mat_MPIAIJ *)A->data; 7269 p = (Mat_MPIAIJ *)P->data; 7270 PetscCall(MatSetSizes(C, m, n, M, N)); 7271 PetscCall(PetscLayoutSetUp(C->rmap)); 7272 PetscCall(PetscLayoutSetUp(C->cmap)); 7273 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7274 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7275 7276 cp = 0; 7277 switch (ptype) { 7278 case MATPRODUCT_AB: /* A * P */ 7279 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7280 7281 /* A_diag * P_local (merged or not) */ 7282 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7283 /* P is product->B */ 7284 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7285 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7286 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7287 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7288 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7289 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7290 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7291 mp[cp]->product->api_user = product->api_user; 7292 PetscCall(MatProductSetFromOptions(mp[cp])); 7293 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7294 PetscCall(ISGetIndices(glob, &globidx)); 7295 rmapt[cp] = 1; 7296 cmapt[cp] = 2; 7297 cmapa[cp] = globidx; 7298 mptmp[cp] = PETSC_FALSE; 7299 cp++; 7300 } else { /* A_diag * P_diag and A_diag * P_off */ 7301 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7302 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7303 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7304 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7305 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7306 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7307 mp[cp]->product->api_user = product->api_user; 7308 PetscCall(MatProductSetFromOptions(mp[cp])); 7309 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7310 rmapt[cp] = 1; 7311 cmapt[cp] = 1; 7312 mptmp[cp] = PETSC_FALSE; 7313 cp++; 7314 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7315 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7316 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7317 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7318 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7319 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7320 mp[cp]->product->api_user = product->api_user; 7321 PetscCall(MatProductSetFromOptions(mp[cp])); 7322 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7323 rmapt[cp] = 1; 7324 cmapt[cp] = 2; 7325 cmapa[cp] = p->garray; 7326 mptmp[cp] = PETSC_FALSE; 7327 cp++; 7328 } 7329 7330 /* A_off * P_other */ 7331 if (mmdata->P_oth) { 7332 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7333 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7334 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7335 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7336 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7337 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7338 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7339 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7340 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7341 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7342 mp[cp]->product->api_user = product->api_user; 7343 PetscCall(MatProductSetFromOptions(mp[cp])); 7344 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7345 rmapt[cp] = 1; 7346 cmapt[cp] = 2; 7347 cmapa[cp] = P_oth_idx; 7348 mptmp[cp] = PETSC_FALSE; 7349 cp++; 7350 } 7351 break; 7352 7353 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7354 /* A is product->B */ 7355 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7356 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7357 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 PetscCall(ISGetIndices(glob, &globidx)); 7367 rmapt[cp] = 2; 7368 rmapa[cp] = globidx; 7369 cmapt[cp] = 2; 7370 cmapa[cp] = globidx; 7371 mptmp[cp] = PETSC_FALSE; 7372 cp++; 7373 } else { 7374 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7375 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7376 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7377 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7378 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7379 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7380 mp[cp]->product->api_user = product->api_user; 7381 PetscCall(MatProductSetFromOptions(mp[cp])); 7382 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7383 PetscCall(ISGetIndices(glob, &globidx)); 7384 rmapt[cp] = 1; 7385 cmapt[cp] = 2; 7386 cmapa[cp] = globidx; 7387 mptmp[cp] = PETSC_FALSE; 7388 cp++; 7389 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7390 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7391 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7392 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7393 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7394 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7395 mp[cp]->product->api_user = product->api_user; 7396 PetscCall(MatProductSetFromOptions(mp[cp])); 7397 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7398 rmapt[cp] = 2; 7399 rmapa[cp] = p->garray; 7400 cmapt[cp] = 2; 7401 cmapa[cp] = globidx; 7402 mptmp[cp] = PETSC_FALSE; 7403 cp++; 7404 } 7405 break; 7406 case MATPRODUCT_PtAP: 7407 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7408 /* P is product->B */ 7409 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7410 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7411 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7412 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7413 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7414 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7415 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7416 mp[cp]->product->api_user = product->api_user; 7417 PetscCall(MatProductSetFromOptions(mp[cp])); 7418 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7419 PetscCall(ISGetIndices(glob, &globidx)); 7420 rmapt[cp] = 2; 7421 rmapa[cp] = globidx; 7422 cmapt[cp] = 2; 7423 cmapa[cp] = globidx; 7424 mptmp[cp] = PETSC_FALSE; 7425 cp++; 7426 if (mmdata->P_oth) { 7427 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7428 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7429 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7430 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7431 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7432 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7433 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7434 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7435 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7436 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7437 mp[cp]->product->api_user = product->api_user; 7438 PetscCall(MatProductSetFromOptions(mp[cp])); 7439 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7440 mptmp[cp] = PETSC_TRUE; 7441 cp++; 7442 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7443 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7444 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7445 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7446 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7447 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7448 mp[cp]->product->api_user = product->api_user; 7449 PetscCall(MatProductSetFromOptions(mp[cp])); 7450 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7451 rmapt[cp] = 2; 7452 rmapa[cp] = globidx; 7453 cmapt[cp] = 2; 7454 cmapa[cp] = P_oth_idx; 7455 mptmp[cp] = PETSC_FALSE; 7456 cp++; 7457 } 7458 break; 7459 default: 7460 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7461 } 7462 /* sanity check */ 7463 if (size > 1) 7464 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7465 7466 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7467 for (i = 0; i < cp; i++) { 7468 mmdata->mp[i] = mp[i]; 7469 mmdata->mptmp[i] = mptmp[i]; 7470 } 7471 mmdata->cp = cp; 7472 C->product->data = mmdata; 7473 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7474 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7475 7476 /* memory type */ 7477 mmdata->mtype = PETSC_MEMTYPE_HOST; 7478 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7479 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7480 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7481 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7482 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7483 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7484 7485 /* prepare coo coordinates for values insertion */ 7486 7487 /* count total nonzeros of those intermediate seqaij Mats 7488 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7489 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7490 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7491 */ 7492 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7493 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7494 if (mptmp[cp]) continue; 7495 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7496 const PetscInt *rmap = rmapa[cp]; 7497 const PetscInt mr = mp[cp]->rmap->n; 7498 const PetscInt rs = C->rmap->rstart; 7499 const PetscInt re = C->rmap->rend; 7500 const PetscInt *ii = mm->i; 7501 for (i = 0; i < mr; i++) { 7502 const PetscInt gr = rmap[i]; 7503 const PetscInt nz = ii[i + 1] - ii[i]; 7504 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7505 else ncoo_oown += nz; /* this row is local */ 7506 } 7507 } else ncoo_d += mm->nz; 7508 } 7509 7510 /* 7511 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7512 7513 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7514 7515 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7516 7517 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7518 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7519 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7520 7521 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7522 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7523 */ 7524 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7525 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7526 7527 /* gather (i,j) of nonzeros inserted by remote procs */ 7528 if (hasoffproc) { 7529 PetscSF msf; 7530 PetscInt ncoo2, *coo_i2, *coo_j2; 7531 7532 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7533 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7534 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7535 7536 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7537 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7538 PetscInt *idxoff = mmdata->off[cp]; 7539 PetscInt *idxown = mmdata->own[cp]; 7540 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7541 const PetscInt *rmap = rmapa[cp]; 7542 const PetscInt *cmap = cmapa[cp]; 7543 const PetscInt *ii = mm->i; 7544 PetscInt *coi = coo_i + ncoo_o; 7545 PetscInt *coj = coo_j + ncoo_o; 7546 const PetscInt mr = mp[cp]->rmap->n; 7547 const PetscInt rs = C->rmap->rstart; 7548 const PetscInt re = C->rmap->rend; 7549 const PetscInt cs = C->cmap->rstart; 7550 for (i = 0; i < mr; i++) { 7551 const PetscInt *jj = mm->j + ii[i]; 7552 const PetscInt gr = rmap[i]; 7553 const PetscInt nz = ii[i + 1] - ii[i]; 7554 if (gr < rs || gr >= re) { /* this is an offproc row */ 7555 for (j = ii[i]; j < ii[i + 1]; j++) { 7556 *coi++ = gr; 7557 *idxoff++ = j; 7558 } 7559 if (!cmapt[cp]) { /* already global */ 7560 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7561 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7562 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7563 } else { /* offdiag */ 7564 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7565 } 7566 ncoo_o += nz; 7567 } else { /* this is a local row */ 7568 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7569 } 7570 } 7571 } 7572 mmdata->off[cp + 1] = idxoff; 7573 mmdata->own[cp + 1] = idxown; 7574 } 7575 7576 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7577 PetscInt incoo_o; 7578 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7579 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7580 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7581 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7582 ncoo = ncoo_d + ncoo_oown + ncoo2; 7583 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7584 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7585 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7586 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7587 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7588 PetscCall(PetscFree2(coo_i, coo_j)); 7589 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7590 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7591 coo_i = coo_i2; 7592 coo_j = coo_j2; 7593 } else { /* no offproc values insertion */ 7594 ncoo = ncoo_d; 7595 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7596 7597 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7598 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7599 PetscCall(PetscSFSetUp(mmdata->sf)); 7600 } 7601 mmdata->hasoffproc = hasoffproc; 7602 7603 /* gather (i,j) of nonzeros inserted locally */ 7604 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7605 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7606 PetscInt *coi = coo_i + ncoo_d; 7607 PetscInt *coj = coo_j + ncoo_d; 7608 const PetscInt *jj = mm->j; 7609 const PetscInt *ii = mm->i; 7610 const PetscInt *cmap = cmapa[cp]; 7611 const PetscInt *rmap = rmapa[cp]; 7612 const PetscInt mr = mp[cp]->rmap->n; 7613 const PetscInt rs = C->rmap->rstart; 7614 const PetscInt re = C->rmap->rend; 7615 const PetscInt cs = C->cmap->rstart; 7616 7617 if (mptmp[cp]) continue; 7618 if (rmapt[cp] == 1) { /* consecutive rows */ 7619 /* fill coo_i */ 7620 for (i = 0; i < mr; i++) { 7621 const PetscInt gr = i + rs; 7622 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7623 } 7624 /* fill coo_j */ 7625 if (!cmapt[cp]) { /* type-0, already global */ 7626 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7627 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7628 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7629 } else { /* type-2, local to global for sparse columns */ 7630 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7631 } 7632 ncoo_d += mm->nz; 7633 } else if (rmapt[cp] == 2) { /* sparse rows */ 7634 for (i = 0; i < mr; i++) { 7635 const PetscInt *jj = mm->j + ii[i]; 7636 const PetscInt gr = rmap[i]; 7637 const PetscInt nz = ii[i + 1] - ii[i]; 7638 if (gr >= rs && gr < re) { /* local rows */ 7639 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7640 if (!cmapt[cp]) { /* type-0, already global */ 7641 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7642 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7643 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7644 } else { /* type-2, local to global for sparse columns */ 7645 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7646 } 7647 ncoo_d += nz; 7648 } 7649 } 7650 } 7651 } 7652 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7653 PetscCall(ISDestroy(&glob)); 7654 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7655 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7656 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7657 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7658 7659 /* preallocate with COO data */ 7660 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7661 PetscCall(PetscFree2(coo_i, coo_j)); 7662 PetscFunctionReturn(PETSC_SUCCESS); 7663 } 7664 7665 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7666 { 7667 Mat_Product *product = mat->product; 7668 #if defined(PETSC_HAVE_DEVICE) 7669 PetscBool match = PETSC_FALSE; 7670 PetscBool usecpu = PETSC_FALSE; 7671 #else 7672 PetscBool match = PETSC_TRUE; 7673 #endif 7674 7675 PetscFunctionBegin; 7676 MatCheckProduct(mat, 1); 7677 #if defined(PETSC_HAVE_DEVICE) 7678 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7679 if (match) { /* we can always fallback to the CPU if requested */ 7680 switch (product->type) { 7681 case MATPRODUCT_AB: 7682 if (product->api_user) { 7683 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7684 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7685 PetscOptionsEnd(); 7686 } else { 7687 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7688 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7689 PetscOptionsEnd(); 7690 } 7691 break; 7692 case MATPRODUCT_AtB: 7693 if (product->api_user) { 7694 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7695 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7696 PetscOptionsEnd(); 7697 } else { 7698 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7699 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7700 PetscOptionsEnd(); 7701 } 7702 break; 7703 case MATPRODUCT_PtAP: 7704 if (product->api_user) { 7705 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7706 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7707 PetscOptionsEnd(); 7708 } else { 7709 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7710 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7711 PetscOptionsEnd(); 7712 } 7713 break; 7714 default: 7715 break; 7716 } 7717 match = (PetscBool)!usecpu; 7718 } 7719 #endif 7720 if (match) { 7721 switch (product->type) { 7722 case MATPRODUCT_AB: 7723 case MATPRODUCT_AtB: 7724 case MATPRODUCT_PtAP: 7725 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7726 break; 7727 default: 7728 break; 7729 } 7730 } 7731 /* fallback to MPIAIJ ops */ 7732 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7733 PetscFunctionReturn(PETSC_SUCCESS); 7734 } 7735 7736 /* 7737 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7738 7739 n - the number of block indices in cc[] 7740 cc - the block indices (must be large enough to contain the indices) 7741 */ 7742 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7743 { 7744 PetscInt cnt = -1, nidx, j; 7745 const PetscInt *idx; 7746 7747 PetscFunctionBegin; 7748 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7749 if (nidx) { 7750 cnt = 0; 7751 cc[cnt] = idx[0] / bs; 7752 for (j = 1; j < nidx; j++) { 7753 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7754 } 7755 } 7756 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7757 *n = cnt + 1; 7758 PetscFunctionReturn(PETSC_SUCCESS); 7759 } 7760 7761 /* 7762 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7763 7764 ncollapsed - the number of block indices 7765 collapsed - the block indices (must be large enough to contain the indices) 7766 */ 7767 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7768 { 7769 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7770 7771 PetscFunctionBegin; 7772 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7773 for (i = start + 1; i < start + bs; i++) { 7774 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7775 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7776 cprevtmp = cprev; 7777 cprev = merged; 7778 merged = cprevtmp; 7779 } 7780 *ncollapsed = nprev; 7781 if (collapsed) *collapsed = cprev; 7782 PetscFunctionReturn(PETSC_SUCCESS); 7783 } 7784 7785 /* 7786 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7787 7788 Input Parameter: 7789 . Amat - matrix 7790 - symmetrize - make the result symmetric 7791 + scale - scale with diagonal 7792 7793 Output Parameter: 7794 . a_Gmat - output scalar graph >= 0 7795 7796 */ 7797 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7798 { 7799 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7800 MPI_Comm comm; 7801 Mat Gmat; 7802 PetscBool ismpiaij, isseqaij; 7803 Mat a, b, c; 7804 MatType jtype; 7805 7806 PetscFunctionBegin; 7807 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7808 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7809 PetscCall(MatGetSize(Amat, &MM, &NN)); 7810 PetscCall(MatGetBlockSize(Amat, &bs)); 7811 nloc = (Iend - Istart) / bs; 7812 7813 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7814 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7815 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7816 7817 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7818 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7819 implementation */ 7820 if (bs > 1) { 7821 PetscCall(MatGetType(Amat, &jtype)); 7822 PetscCall(MatCreate(comm, &Gmat)); 7823 PetscCall(MatSetType(Gmat, jtype)); 7824 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7825 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7826 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7827 PetscInt *d_nnz, *o_nnz; 7828 MatScalar *aa, val, *AA; 7829 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7830 7831 if (isseqaij) { 7832 a = Amat; 7833 b = NULL; 7834 } else { 7835 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7836 a = d->A; 7837 b = d->B; 7838 } 7839 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7840 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7841 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7842 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7843 const PetscInt *cols1, *cols2; 7844 7845 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7846 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7847 nnz[brow / bs] = nc2 / bs; 7848 if (nc2 % bs) ok = 0; 7849 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7850 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7851 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7852 if (nc1 != nc2) ok = 0; 7853 else { 7854 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7855 if (cols1[jj] != cols2[jj]) ok = 0; 7856 if (cols1[jj] % bs != jj % bs) ok = 0; 7857 } 7858 } 7859 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7860 } 7861 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7862 if (!ok) { 7863 PetscCall(PetscFree2(d_nnz, o_nnz)); 7864 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7865 goto old_bs; 7866 } 7867 } 7868 } 7869 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7870 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7871 PetscCall(PetscFree2(d_nnz, o_nnz)); 7872 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7873 // diag 7874 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7875 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7876 7877 ai = aseq->i; 7878 n = ai[brow + 1] - ai[brow]; 7879 aj = aseq->j + ai[brow]; 7880 for (PetscInt k = 0; k < n; k += bs) { // block columns 7881 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7882 val = 0; 7883 if (index_size == 0) { 7884 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7885 aa = aseq->a + ai[brow + ii] + k; 7886 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7887 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7888 } 7889 } 7890 } else { // use (index,index) value if provided 7891 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7892 PetscInt ii = index[iii]; 7893 aa = aseq->a + ai[brow + ii] + k; 7894 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7895 PetscInt jj = index[jjj]; 7896 val += PetscAbs(PetscRealPart(aa[jj])); 7897 } 7898 } 7899 } 7900 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7901 AA[k / bs] = val; 7902 } 7903 grow = Istart / bs + brow / bs; 7904 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7905 } 7906 // off-diag 7907 if (ismpiaij) { 7908 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7909 const PetscScalar *vals; 7910 const PetscInt *cols, *garray = aij->garray; 7911 7912 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7913 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7914 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7915 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7916 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7917 AA[k / bs] = 0; 7918 AJ[cidx] = garray[cols[k]] / bs; 7919 } 7920 nc = ncols / bs; 7921 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7922 if (index_size == 0) { 7923 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7924 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7925 for (PetscInt k = 0; k < ncols; k += bs) { 7926 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7927 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7928 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7929 } 7930 } 7931 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7932 } 7933 } else { // use (index,index) value if provided 7934 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7935 PetscInt ii = index[iii]; 7936 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7937 for (PetscInt k = 0; k < ncols; k += bs) { 7938 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7939 PetscInt jj = index[jjj]; 7940 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7941 } 7942 } 7943 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7944 } 7945 } 7946 grow = Istart / bs + brow / bs; 7947 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7948 } 7949 } 7950 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7951 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7952 PetscCall(PetscFree2(AA, AJ)); 7953 } else { 7954 const PetscScalar *vals; 7955 const PetscInt *idx; 7956 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7957 old_bs: 7958 /* 7959 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7960 */ 7961 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7962 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7963 if (isseqaij) { 7964 PetscInt max_d_nnz; 7965 7966 /* 7967 Determine exact preallocation count for (sequential) scalar matrix 7968 */ 7969 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7970 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7971 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7972 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7973 PetscCall(PetscFree3(w0, w1, w2)); 7974 } else if (ismpiaij) { 7975 Mat Daij, Oaij; 7976 const PetscInt *garray; 7977 PetscInt max_d_nnz; 7978 7979 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7980 /* 7981 Determine exact preallocation count for diagonal block portion of scalar matrix 7982 */ 7983 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7984 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7985 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7986 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7987 PetscCall(PetscFree3(w0, w1, w2)); 7988 /* 7989 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7990 */ 7991 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7992 o_nnz[jj] = 0; 7993 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7994 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7995 o_nnz[jj] += ncols; 7996 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7997 } 7998 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7999 } 8000 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8001 /* get scalar copy (norms) of matrix */ 8002 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8003 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8004 PetscCall(PetscFree2(d_nnz, o_nnz)); 8005 for (Ii = Istart; Ii < Iend; Ii++) { 8006 PetscInt dest_row = Ii / bs; 8007 8008 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8009 for (jj = 0; jj < ncols; jj++) { 8010 PetscInt dest_col = idx[jj] / bs; 8011 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8012 8013 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8014 } 8015 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8016 } 8017 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8018 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8019 } 8020 } else { 8021 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8022 else { 8023 Gmat = Amat; 8024 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8025 } 8026 if (isseqaij) { 8027 a = Gmat; 8028 b = NULL; 8029 } else { 8030 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8031 a = d->A; 8032 b = d->B; 8033 } 8034 if (filter >= 0 || scale) { 8035 /* take absolute value of each entry */ 8036 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8037 MatInfo info; 8038 PetscScalar *avals; 8039 8040 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8041 PetscCall(MatSeqAIJGetArray(c, &avals)); 8042 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8043 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8044 } 8045 } 8046 } 8047 if (symmetrize) { 8048 PetscBool isset, issym; 8049 8050 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8051 if (!isset || !issym) { 8052 Mat matTrans; 8053 8054 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8055 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8056 PetscCall(MatDestroy(&matTrans)); 8057 } 8058 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8059 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8060 if (scale) { 8061 /* scale c for all diagonal values = 1 or -1 */ 8062 Vec diag; 8063 8064 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8065 PetscCall(MatGetDiagonal(Gmat, diag)); 8066 PetscCall(VecReciprocal(diag)); 8067 PetscCall(VecSqrtAbs(diag)); 8068 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8069 PetscCall(VecDestroy(&diag)); 8070 } 8071 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8072 if (filter >= 0) { 8073 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8074 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8075 } 8076 *a_Gmat = Gmat; 8077 PetscFunctionReturn(PETSC_SUCCESS); 8078 } 8079 8080 /* 8081 Special version for direct calls from Fortran 8082 */ 8083 8084 /* Change these macros so can be used in void function */ 8085 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8086 #undef PetscCall 8087 #define PetscCall(...) \ 8088 do { \ 8089 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8090 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8091 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8092 return; \ 8093 } \ 8094 } while (0) 8095 8096 #undef SETERRQ 8097 #define SETERRQ(comm, ierr, ...) \ 8098 do { \ 8099 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8100 return; \ 8101 } while (0) 8102 8103 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8104 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8105 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8106 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8107 #else 8108 #endif 8109 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8110 { 8111 Mat mat = *mmat; 8112 PetscInt m = *mm, n = *mn; 8113 InsertMode addv = *maddv; 8114 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8115 PetscScalar value; 8116 8117 MatCheckPreallocated(mat, 1); 8118 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8119 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8120 { 8121 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8122 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8123 PetscBool roworiented = aij->roworiented; 8124 8125 /* Some Variables required in the macro */ 8126 Mat A = aij->A; 8127 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8128 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8129 MatScalar *aa; 8130 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8131 Mat B = aij->B; 8132 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8133 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8134 MatScalar *ba; 8135 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8136 * cannot use "#if defined" inside a macro. */ 8137 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8138 8139 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8140 PetscInt nonew = a->nonew; 8141 MatScalar *ap1, *ap2; 8142 8143 PetscFunctionBegin; 8144 PetscCall(MatSeqAIJGetArray(A, &aa)); 8145 PetscCall(MatSeqAIJGetArray(B, &ba)); 8146 for (i = 0; i < m; i++) { 8147 if (im[i] < 0) continue; 8148 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8149 if (im[i] >= rstart && im[i] < rend) { 8150 row = im[i] - rstart; 8151 lastcol1 = -1; 8152 rp1 = aj + ai[row]; 8153 ap1 = aa + ai[row]; 8154 rmax1 = aimax[row]; 8155 nrow1 = ailen[row]; 8156 low1 = 0; 8157 high1 = nrow1; 8158 lastcol2 = -1; 8159 rp2 = bj + bi[row]; 8160 ap2 = ba + bi[row]; 8161 rmax2 = bimax[row]; 8162 nrow2 = bilen[row]; 8163 low2 = 0; 8164 high2 = nrow2; 8165 8166 for (j = 0; j < n; j++) { 8167 if (roworiented) value = v[i * n + j]; 8168 else value = v[i + j * m]; 8169 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8170 if (in[j] >= cstart && in[j] < cend) { 8171 col = in[j] - cstart; 8172 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8173 } else if (in[j] < 0) continue; 8174 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8175 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8176 } else { 8177 if (mat->was_assembled) { 8178 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8179 #if defined(PETSC_USE_CTABLE) 8180 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8181 col--; 8182 #else 8183 col = aij->colmap[in[j]] - 1; 8184 #endif 8185 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8186 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8187 col = in[j]; 8188 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8189 B = aij->B; 8190 b = (Mat_SeqAIJ *)B->data; 8191 bimax = b->imax; 8192 bi = b->i; 8193 bilen = b->ilen; 8194 bj = b->j; 8195 rp2 = bj + bi[row]; 8196 ap2 = ba + bi[row]; 8197 rmax2 = bimax[row]; 8198 nrow2 = bilen[row]; 8199 low2 = 0; 8200 high2 = nrow2; 8201 bm = aij->B->rmap->n; 8202 ba = b->a; 8203 inserted = PETSC_FALSE; 8204 } 8205 } else col = in[j]; 8206 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8207 } 8208 } 8209 } else if (!aij->donotstash) { 8210 if (roworiented) { 8211 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8212 } else { 8213 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8214 } 8215 } 8216 } 8217 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8218 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8219 } 8220 PetscFunctionReturnVoid(); 8221 } 8222 8223 /* Undefining these here since they were redefined from their original definition above! No 8224 * other PETSc functions should be defined past this point, as it is impossible to recover the 8225 * original definitions */ 8226 #undef PetscCall 8227 #undef SETERRQ 8228