1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ, 2877 NULL, 2878 NULL, 2879 /*155*/ NULL, 2880 MatCopyHashToXAIJ_MPI_Hash}; 2881 2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatStoreValues(aij->A)); 2888 PetscCall(MatStoreValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2893 { 2894 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2895 2896 PetscFunctionBegin; 2897 PetscCall(MatRetrieveValues(aij->A)); 2898 PetscCall(MatRetrieveValues(aij->B)); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2903 { 2904 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2905 PetscMPIInt size; 2906 2907 PetscFunctionBegin; 2908 if (B->hash_active) { 2909 B->ops[0] = b->cops; 2910 B->hash_active = PETSC_FALSE; 2911 } 2912 PetscCall(PetscLayoutSetUp(B->rmap)); 2913 PetscCall(PetscLayoutSetUp(B->cmap)); 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2925 2926 MatSeqXAIJGetOptions_Private(b->B); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->B); 2933 2934 MatSeqXAIJGetOptions_Private(b->A); 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 MatSeqXAIJRestoreOptions_Private(b->A); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 /* Save the nonzero states of the component matrices because those are what are used to determine 2954 the nonzero state of mat */ 2955 PetscObjectState diagstate = b->A->nonzerostate, offdiagstate = b->B->nonzerostate; 2956 2957 PetscFunctionBegin; 2958 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2959 PetscCall(PetscLayoutSetUp(B->rmap)); 2960 PetscCall(PetscLayoutSetUp(B->cmap)); 2961 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2962 else { 2963 #if defined(PETSC_USE_CTABLE) 2964 PetscCall(PetscHMapIDestroy(&b->colmap)); 2965 #else 2966 PetscCall(PetscFree(b->colmap)); 2967 #endif 2968 PetscCall(PetscFree(b->garray)); 2969 PetscCall(VecDestroy(&b->lvec)); 2970 } 2971 PetscCall(VecScatterDestroy(&b->Mvctx)); 2972 2973 PetscCall(MatResetPreallocation(b->A)); 2974 PetscCall(MatResetPreallocation(b->B)); 2975 B->preallocated = PETSC_TRUE; 2976 B->was_assembled = PETSC_FALSE; 2977 B->assembled = PETSC_FALSE; 2978 b->A->nonzerostate = ++diagstate, b->B->nonzerostate = ++offdiagstate; 2979 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2980 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2981 PetscFunctionReturn(PETSC_SUCCESS); 2982 } 2983 2984 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2985 { 2986 Mat mat; 2987 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2988 2989 PetscFunctionBegin; 2990 *newmat = NULL; 2991 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2992 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2993 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2994 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2995 a = (Mat_MPIAIJ *)mat->data; 2996 2997 mat->factortype = matin->factortype; 2998 mat->assembled = matin->assembled; 2999 mat->insertmode = NOT_SET_VALUES; 3000 3001 a->size = oldmat->size; 3002 a->rank = oldmat->rank; 3003 a->donotstash = oldmat->donotstash; 3004 a->roworiented = oldmat->roworiented; 3005 a->rowindices = NULL; 3006 a->rowvalues = NULL; 3007 a->getrowactive = PETSC_FALSE; 3008 3009 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3010 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3011 if (matin->hash_active) { 3012 PetscCall(MatSetUp(mat)); 3013 } else { 3014 mat->preallocated = matin->preallocated; 3015 if (oldmat->colmap) { 3016 #if defined(PETSC_USE_CTABLE) 3017 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3018 #else 3019 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3020 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3021 #endif 3022 } else a->colmap = NULL; 3023 if (oldmat->garray) { 3024 PetscInt len; 3025 len = oldmat->B->cmap->n; 3026 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3027 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3028 } else a->garray = NULL; 3029 3030 /* It may happen MatDuplicate is called with a non-assembled matrix 3031 In fact, MatDuplicate only requires the matrix to be preallocated 3032 This may happen inside a DMCreateMatrix_Shell */ 3033 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3034 if (oldmat->Mvctx) { 3035 a->Mvctx = oldmat->Mvctx; 3036 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3037 } 3038 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3039 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3040 } 3041 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3042 *newmat = mat; 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3047 { 3048 PetscBool isbinary, ishdf5; 3049 3050 PetscFunctionBegin; 3051 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3052 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3053 /* force binary viewer to load .info file if it has not yet done so */ 3054 PetscCall(PetscViewerSetUp(viewer)); 3055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3056 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3057 if (isbinary) { 3058 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3059 } else if (ishdf5) { 3060 #if defined(PETSC_HAVE_HDF5) 3061 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3062 #else 3063 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3064 #endif 3065 } else { 3066 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3067 } 3068 PetscFunctionReturn(PETSC_SUCCESS); 3069 } 3070 3071 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3072 { 3073 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3074 PetscInt *rowidxs, *colidxs; 3075 PetscScalar *matvals; 3076 3077 PetscFunctionBegin; 3078 PetscCall(PetscViewerSetUp(viewer)); 3079 3080 /* read in matrix header */ 3081 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3082 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3083 M = header[1]; 3084 N = header[2]; 3085 nz = header[3]; 3086 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3087 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3088 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3089 3090 /* set block sizes from the viewer's .info file */ 3091 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3092 /* set global sizes if not set already */ 3093 if (mat->rmap->N < 0) mat->rmap->N = M; 3094 if (mat->cmap->N < 0) mat->cmap->N = N; 3095 PetscCall(PetscLayoutSetUp(mat->rmap)); 3096 PetscCall(PetscLayoutSetUp(mat->cmap)); 3097 3098 /* check if the matrix sizes are correct */ 3099 PetscCall(MatGetSize(mat, &rows, &cols)); 3100 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3101 3102 /* read in row lengths and build row indices */ 3103 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3104 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3105 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3106 rowidxs[0] = 0; 3107 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3108 if (nz != PETSC_INT_MAX) { 3109 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3110 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3111 } 3112 3113 /* read in column indices and matrix values */ 3114 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3115 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3116 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3117 /* store matrix indices and values */ 3118 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3119 PetscCall(PetscFree(rowidxs)); 3120 PetscCall(PetscFree2(colidxs, matvals)); 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /* Not scalable because of ISAllGather() unless getting all columns. */ 3125 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3126 { 3127 IS iscol_local; 3128 PetscBool isstride; 3129 PetscMPIInt gisstride = 0; 3130 3131 PetscFunctionBegin; 3132 /* check if we are grabbing all columns*/ 3133 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3134 3135 if (isstride) { 3136 PetscInt start, len, mstart, mlen; 3137 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3138 PetscCall(ISGetLocalSize(iscol, &len)); 3139 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3140 if (mstart == start && mlen - mstart == len) gisstride = 1; 3141 } 3142 3143 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3144 if (gisstride) { 3145 PetscInt N; 3146 PetscCall(MatGetSize(mat, NULL, &N)); 3147 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3148 PetscCall(ISSetIdentity(iscol_local)); 3149 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3150 } else { 3151 PetscInt cbs; 3152 PetscCall(ISGetBlockSize(iscol, &cbs)); 3153 PetscCall(ISAllGather(iscol, &iscol_local)); 3154 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3155 } 3156 3157 *isseq = iscol_local; 3158 PetscFunctionReturn(PETSC_SUCCESS); 3159 } 3160 3161 /* 3162 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3163 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3164 3165 Input Parameters: 3166 + mat - matrix 3167 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3168 i.e., mat->rstart <= isrow[i] < mat->rend 3169 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3170 i.e., mat->cstart <= iscol[i] < mat->cend 3171 3172 Output Parameters: 3173 + isrow_d - sequential row index set for retrieving mat->A 3174 . iscol_d - sequential column index set for retrieving mat->A 3175 . iscol_o - sequential column index set for retrieving mat->B 3176 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3177 */ 3178 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3179 { 3180 Vec x, cmap; 3181 const PetscInt *is_idx; 3182 PetscScalar *xarray, *cmaparray; 3183 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3184 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3185 Mat B = a->B; 3186 Vec lvec = a->lvec, lcmap; 3187 PetscInt i, cstart, cend, Bn = B->cmap->N; 3188 MPI_Comm comm; 3189 VecScatter Mvctx = a->Mvctx; 3190 3191 PetscFunctionBegin; 3192 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3193 PetscCall(ISGetLocalSize(iscol, &ncols)); 3194 3195 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3196 PetscCall(MatCreateVecs(mat, &x, NULL)); 3197 PetscCall(VecSet(x, -1.0)); 3198 PetscCall(VecDuplicate(x, &cmap)); 3199 PetscCall(VecSet(cmap, -1.0)); 3200 3201 /* Get start indices */ 3202 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3203 isstart -= ncols; 3204 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3205 3206 PetscCall(ISGetIndices(iscol, &is_idx)); 3207 PetscCall(VecGetArray(x, &xarray)); 3208 PetscCall(VecGetArray(cmap, &cmaparray)); 3209 PetscCall(PetscMalloc1(ncols, &idx)); 3210 for (i = 0; i < ncols; i++) { 3211 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3212 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3213 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3214 } 3215 PetscCall(VecRestoreArray(x, &xarray)); 3216 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3217 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3218 3219 /* Get iscol_d */ 3220 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3221 PetscCall(ISGetBlockSize(iscol, &i)); 3222 PetscCall(ISSetBlockSize(*iscol_d, i)); 3223 3224 /* Get isrow_d */ 3225 PetscCall(ISGetLocalSize(isrow, &m)); 3226 rstart = mat->rmap->rstart; 3227 PetscCall(PetscMalloc1(m, &idx)); 3228 PetscCall(ISGetIndices(isrow, &is_idx)); 3229 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3230 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3231 3232 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3233 PetscCall(ISGetBlockSize(isrow, &i)); 3234 PetscCall(ISSetBlockSize(*isrow_d, i)); 3235 3236 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3237 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3238 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3239 3240 PetscCall(VecDuplicate(lvec, &lcmap)); 3241 3242 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3243 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3244 3245 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3246 /* off-process column indices */ 3247 count = 0; 3248 PetscCall(PetscMalloc1(Bn, &idx)); 3249 PetscCall(PetscMalloc1(Bn, &cmap1)); 3250 3251 PetscCall(VecGetArray(lvec, &xarray)); 3252 PetscCall(VecGetArray(lcmap, &cmaparray)); 3253 for (i = 0; i < Bn; i++) { 3254 if (PetscRealPart(xarray[i]) > -1.0) { 3255 idx[count] = i; /* local column index in off-diagonal part B */ 3256 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3257 count++; 3258 } 3259 } 3260 PetscCall(VecRestoreArray(lvec, &xarray)); 3261 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3262 3263 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3264 /* cannot ensure iscol_o has same blocksize as iscol! */ 3265 3266 PetscCall(PetscFree(idx)); 3267 *garray = cmap1; 3268 3269 PetscCall(VecDestroy(&x)); 3270 PetscCall(VecDestroy(&cmap)); 3271 PetscCall(VecDestroy(&lcmap)); 3272 PetscFunctionReturn(PETSC_SUCCESS); 3273 } 3274 3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3277 { 3278 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3279 Mat M = NULL; 3280 MPI_Comm comm; 3281 IS iscol_d, isrow_d, iscol_o; 3282 Mat Asub = NULL, Bsub = NULL; 3283 PetscInt n, count, M_size, N_size; 3284 3285 PetscFunctionBegin; 3286 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3287 3288 if (call == MAT_REUSE_MATRIX) { 3289 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3290 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3291 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3292 3293 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3294 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3295 3296 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3297 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3298 3299 /* Update diagonal and off-diagonal portions of submat */ 3300 asub = (Mat_MPIAIJ *)(*submat)->data; 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3302 PetscCall(ISGetLocalSize(iscol_o, &n)); 3303 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3304 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3305 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3306 3307 } else { /* call == MAT_INITIAL_MATRIX) */ 3308 PetscInt *garray, *garray_compact; 3309 PetscInt BsubN; 3310 3311 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3312 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3313 3314 /* Create local submatrices Asub and Bsub */ 3315 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3316 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3317 3318 // Compact garray so its not of size Bn 3319 PetscCall(ISGetSize(iscol_o, &count)); 3320 PetscCall(PetscMalloc1(count, &garray_compact)); 3321 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3322 3323 /* Create submatrix M */ 3324 PetscCall(ISGetSize(isrow, &M_size)); 3325 PetscCall(ISGetSize(iscol, &N_size)); 3326 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3327 3328 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3329 asub = (Mat_MPIAIJ *)M->data; 3330 3331 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3332 n = asub->B->cmap->N; 3333 if (BsubN > n) { 3334 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3335 const PetscInt *idx; 3336 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3337 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3338 3339 PetscCall(PetscMalloc1(n, &idx_new)); 3340 j = 0; 3341 PetscCall(ISGetIndices(iscol_o, &idx)); 3342 for (i = 0; i < n; i++) { 3343 if (j >= BsubN) break; 3344 while (subgarray[i] > garray[j]) j++; 3345 3346 if (subgarray[i] == garray[j]) { 3347 idx_new[i] = idx[j++]; 3348 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3349 } 3350 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3351 3352 PetscCall(ISDestroy(&iscol_o)); 3353 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3354 3355 } else if (BsubN < n) { 3356 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3357 } 3358 3359 PetscCall(PetscFree(garray)); 3360 *submat = M; 3361 3362 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3363 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3364 PetscCall(ISDestroy(&isrow_d)); 3365 3366 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3367 PetscCall(ISDestroy(&iscol_d)); 3368 3369 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3370 PetscCall(ISDestroy(&iscol_o)); 3371 } 3372 PetscFunctionReturn(PETSC_SUCCESS); 3373 } 3374 3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3376 { 3377 IS iscol_local = NULL, isrow_d; 3378 PetscInt csize; 3379 PetscInt n, i, j, start, end; 3380 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3381 MPI_Comm comm; 3382 3383 PetscFunctionBegin; 3384 /* If isrow has same processor distribution as mat, 3385 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3386 if (call == MAT_REUSE_MATRIX) { 3387 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3388 if (isrow_d) { 3389 sameRowDist = PETSC_TRUE; 3390 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3391 } else { 3392 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3393 if (iscol_local) { 3394 sameRowDist = PETSC_TRUE; 3395 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3396 } 3397 } 3398 } else { 3399 /* Check if isrow has same processor distribution as mat */ 3400 sameDist[0] = PETSC_FALSE; 3401 PetscCall(ISGetLocalSize(isrow, &n)); 3402 if (!n) { 3403 sameDist[0] = PETSC_TRUE; 3404 } else { 3405 PetscCall(ISGetMinMax(isrow, &i, &j)); 3406 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3407 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3408 } 3409 3410 /* Check if iscol has same processor distribution as mat */ 3411 sameDist[1] = PETSC_FALSE; 3412 PetscCall(ISGetLocalSize(iscol, &n)); 3413 if (!n) { 3414 sameDist[1] = PETSC_TRUE; 3415 } else { 3416 PetscCall(ISGetMinMax(iscol, &i, &j)); 3417 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3418 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3419 } 3420 3421 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3422 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3423 sameRowDist = tsameDist[0]; 3424 } 3425 3426 if (sameRowDist) { 3427 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3428 /* isrow and iscol have same processor distribution as mat */ 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } else { /* sameRowDist */ 3432 /* isrow has same processor distribution as mat */ 3433 if (call == MAT_INITIAL_MATRIX) { 3434 PetscBool sorted; 3435 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3436 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3437 PetscCall(ISGetSize(iscol, &i)); 3438 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3439 3440 PetscCall(ISSorted(iscol_local, &sorted)); 3441 if (sorted) { 3442 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3443 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3444 PetscFunctionReturn(PETSC_SUCCESS); 3445 } 3446 } else { /* call == MAT_REUSE_MATRIX */ 3447 IS iscol_sub; 3448 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3449 if (iscol_sub) { 3450 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 } 3454 } 3455 } 3456 3457 /* General case: iscol -> iscol_local which has global size of iscol */ 3458 if (call == MAT_REUSE_MATRIX) { 3459 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3460 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3461 } else { 3462 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3463 } 3464 3465 PetscCall(ISGetLocalSize(iscol, &csize)); 3466 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3467 3468 if (call == MAT_INITIAL_MATRIX) { 3469 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3470 PetscCall(ISDestroy(&iscol_local)); 3471 } 3472 PetscFunctionReturn(PETSC_SUCCESS); 3473 } 3474 3475 /*@C 3476 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3477 and "off-diagonal" part of the matrix in CSR format. 3478 3479 Collective 3480 3481 Input Parameters: 3482 + comm - MPI communicator 3483 . M - the global row size 3484 . N - the global column size 3485 . A - "diagonal" portion of matrix 3486 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3487 - garray - either `NULL` or the global index of `B` columns 3488 3489 Output Parameter: 3490 . mat - the matrix, with input `A` as its local diagonal matrix 3491 3492 Level: advanced 3493 3494 Notes: 3495 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3496 3497 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3498 3499 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3500 @*/ 3501 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3502 { 3503 PetscInt m, n; 3504 MatType mpi_mat_type; 3505 3506 PetscFunctionBegin; 3507 PetscCall(MatCreate(comm, mat)); 3508 PetscCall(MatGetSize(A, &m, &n)); 3509 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3510 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3511 3512 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3513 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3514 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3515 PetscCall(MatSetType(*mat, mpi_mat_type)); 3516 3517 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3518 3519 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3520 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3521 PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray)); 3522 PetscFunctionReturn(PETSC_SUCCESS); 3523 } 3524 3525 /* 3526 MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix. 3527 It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in 3528 B with local indices and the correct size, along with the accompanying 3529 garray, hence skipping compactification 3530 3531 Collective 3532 3533 Input Parameters: 3534 + mat - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set 3535 . A - the diag matrix using local col ids 3536 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3537 - garray - either `NULL` or the global index of `B` columns 3538 3539 Output Parameter: 3540 . mat - the updated `MATMPIAIJ` matrix 3541 3542 Level: advanced 3543 3544 Notes: 3545 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3546 3547 `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore. 3548 3549 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3550 */ 3551 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray) 3552 { 3553 PetscFunctionBegin; 3554 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 3555 PetscInt m, n, M, N, Am, An, Bm, Bn; 3556 3557 PetscCall(MatGetSize(mat, &M, &N)); 3558 PetscCall(MatGetLocalSize(mat, &m, &n)); 3559 PetscCall(MatGetLocalSize(A, &Am, &An)); 3560 PetscCall(MatGetLocalSize(B, &Bm, &Bn)); 3561 3562 PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match"); 3563 PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match"); 3564 PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty"); 3565 mpiaij->A = A; 3566 mpiaij->B = B; 3567 mpiaij->garray = garray; 3568 3569 mat->preallocated = PETSC_TRUE; 3570 mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3571 3572 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3573 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3574 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3575 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3576 */ 3577 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3578 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3579 PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3580 PetscFunctionReturn(PETSC_SUCCESS); 3581 } 3582 3583 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3584 3585 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3586 { 3587 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3588 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3590 Mat M, Msub, B = a->B; 3591 MatScalar *aa; 3592 Mat_SeqAIJ *aij; 3593 PetscInt *garray = a->garray, *colsub, Ncols; 3594 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3595 IS iscol_sub, iscmap; 3596 const PetscInt *is_idx, *cmap; 3597 PetscBool allcolumns = PETSC_FALSE; 3598 MPI_Comm comm; 3599 3600 PetscFunctionBegin; 3601 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3602 if (call == MAT_REUSE_MATRIX) { 3603 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3604 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3605 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3606 3607 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3608 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3609 3610 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3611 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3612 3613 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3614 3615 } else { /* call == MAT_INITIAL_MATRIX) */ 3616 PetscBool flg; 3617 3618 PetscCall(ISGetLocalSize(iscol, &n)); 3619 PetscCall(ISGetSize(iscol, &Ncols)); 3620 3621 /* (1) iscol -> nonscalable iscol_local */ 3622 /* Check for special case: each processor gets entire matrix columns */ 3623 PetscCall(ISIdentity(iscol_local, &flg)); 3624 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3625 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3626 if (allcolumns) { 3627 iscol_sub = iscol_local; 3628 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3629 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3630 3631 } else { 3632 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3633 PetscInt *idx, *cmap1, k; 3634 PetscCall(PetscMalloc1(Ncols, &idx)); 3635 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3636 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3637 count = 0; 3638 k = 0; 3639 for (i = 0; i < Ncols; i++) { 3640 j = is_idx[i]; 3641 if (j >= cstart && j < cend) { 3642 /* diagonal part of mat */ 3643 idx[count] = j; 3644 cmap1[count++] = i; /* column index in submat */ 3645 } else if (Bn) { 3646 /* off-diagonal part of mat */ 3647 if (j == garray[k]) { 3648 idx[count] = j; 3649 cmap1[count++] = i; /* column index in submat */ 3650 } else if (j > garray[k]) { 3651 while (j > garray[k] && k < Bn - 1) k++; 3652 if (j == garray[k]) { 3653 idx[count] = j; 3654 cmap1[count++] = i; /* column index in submat */ 3655 } 3656 } 3657 } 3658 } 3659 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3660 3661 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3662 PetscCall(ISGetBlockSize(iscol, &cbs)); 3663 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3664 3665 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3666 } 3667 3668 /* (3) Create sequential Msub */ 3669 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3670 } 3671 3672 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3673 aij = (Mat_SeqAIJ *)Msub->data; 3674 ii = aij->i; 3675 PetscCall(ISGetIndices(iscmap, &cmap)); 3676 3677 /* 3678 m - number of local rows 3679 Ncols - number of columns (same on all processors) 3680 rstart - first row in new global matrix generated 3681 */ 3682 PetscCall(MatGetSize(Msub, &m, NULL)); 3683 3684 if (call == MAT_INITIAL_MATRIX) { 3685 /* (4) Create parallel newmat */ 3686 PetscMPIInt rank, size; 3687 PetscInt csize; 3688 3689 PetscCallMPI(MPI_Comm_size(comm, &size)); 3690 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3691 3692 /* 3693 Determine the number of non-zeros in the diagonal and off-diagonal 3694 portions of the matrix in order to do correct preallocation 3695 */ 3696 3697 /* first get start and end of "diagonal" columns */ 3698 PetscCall(ISGetLocalSize(iscol, &csize)); 3699 if (csize == PETSC_DECIDE) { 3700 PetscCall(ISGetSize(isrow, &mglobal)); 3701 if (mglobal == Ncols) { /* square matrix */ 3702 nlocal = m; 3703 } else { 3704 nlocal = Ncols / size + ((Ncols % size) > rank); 3705 } 3706 } else { 3707 nlocal = csize; 3708 } 3709 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3710 rstart = rend - nlocal; 3711 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3712 3713 /* next, compute all the lengths */ 3714 jj = aij->j; 3715 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3716 olens = dlens + m; 3717 for (i = 0; i < m; i++) { 3718 jend = ii[i + 1] - ii[i]; 3719 olen = 0; 3720 dlen = 0; 3721 for (j = 0; j < jend; j++) { 3722 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3723 else dlen++; 3724 jj++; 3725 } 3726 olens[i] = olen; 3727 dlens[i] = dlen; 3728 } 3729 3730 PetscCall(ISGetBlockSize(isrow, &bs)); 3731 PetscCall(ISGetBlockSize(iscol, &cbs)); 3732 3733 PetscCall(MatCreate(comm, &M)); 3734 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3735 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3736 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3737 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3738 PetscCall(PetscFree(dlens)); 3739 3740 } else { /* call == MAT_REUSE_MATRIX */ 3741 M = *newmat; 3742 PetscCall(MatGetLocalSize(M, &i, NULL)); 3743 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3744 PetscCall(MatZeroEntries(M)); 3745 /* 3746 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3747 rather than the slower MatSetValues(). 3748 */ 3749 M->was_assembled = PETSC_TRUE; 3750 M->assembled = PETSC_FALSE; 3751 } 3752 3753 /* (5) Set values of Msub to *newmat */ 3754 PetscCall(PetscMalloc1(count, &colsub)); 3755 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3756 3757 jj = aij->j; 3758 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3759 for (i = 0; i < m; i++) { 3760 row = rstart + i; 3761 nz = ii[i + 1] - ii[i]; 3762 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3763 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3764 jj += nz; 3765 aa += nz; 3766 } 3767 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3768 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3769 3770 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3771 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3772 3773 PetscCall(PetscFree(colsub)); 3774 3775 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3776 if (call == MAT_INITIAL_MATRIX) { 3777 *newmat = M; 3778 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3779 PetscCall(MatDestroy(&Msub)); 3780 3781 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3782 PetscCall(ISDestroy(&iscol_sub)); 3783 3784 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3785 PetscCall(ISDestroy(&iscmap)); 3786 3787 if (iscol_local) { 3788 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3789 PetscCall(ISDestroy(&iscol_local)); 3790 } 3791 } 3792 PetscFunctionReturn(PETSC_SUCCESS); 3793 } 3794 3795 /* 3796 Not great since it makes two copies of the submatrix, first an SeqAIJ 3797 in local and then by concatenating the local matrices the end result. 3798 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3799 3800 This requires a sequential iscol with all indices. 3801 */ 3802 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3803 { 3804 PetscMPIInt rank, size; 3805 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3806 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3807 Mat M, Mreuse; 3808 MatScalar *aa, *vwork; 3809 MPI_Comm comm; 3810 Mat_SeqAIJ *aij; 3811 PetscBool colflag, allcolumns = PETSC_FALSE; 3812 3813 PetscFunctionBegin; 3814 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3815 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3816 PetscCallMPI(MPI_Comm_size(comm, &size)); 3817 3818 /* Check for special case: each processor gets entire matrix columns */ 3819 PetscCall(ISIdentity(iscol, &colflag)); 3820 PetscCall(ISGetLocalSize(iscol, &n)); 3821 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3822 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3823 3824 if (call == MAT_REUSE_MATRIX) { 3825 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3826 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3827 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3828 } else { 3829 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3830 } 3831 3832 /* 3833 m - number of local rows 3834 n - number of columns (same on all processors) 3835 rstart - first row in new global matrix generated 3836 */ 3837 PetscCall(MatGetSize(Mreuse, &m, &n)); 3838 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3839 if (call == MAT_INITIAL_MATRIX) { 3840 aij = (Mat_SeqAIJ *)Mreuse->data; 3841 ii = aij->i; 3842 jj = aij->j; 3843 3844 /* 3845 Determine the number of non-zeros in the diagonal and off-diagonal 3846 portions of the matrix in order to do correct preallocation 3847 */ 3848 3849 /* first get start and end of "diagonal" columns */ 3850 if (csize == PETSC_DECIDE) { 3851 PetscCall(ISGetSize(isrow, &mglobal)); 3852 if (mglobal == n) { /* square matrix */ 3853 nlocal = m; 3854 } else { 3855 nlocal = n / size + ((n % size) > rank); 3856 } 3857 } else { 3858 nlocal = csize; 3859 } 3860 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3861 rstart = rend - nlocal; 3862 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3863 3864 /* next, compute all the lengths */ 3865 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3866 olens = dlens + m; 3867 for (i = 0; i < m; i++) { 3868 jend = ii[i + 1] - ii[i]; 3869 olen = 0; 3870 dlen = 0; 3871 for (j = 0; j < jend; j++) { 3872 if (*jj < rstart || *jj >= rend) olen++; 3873 else dlen++; 3874 jj++; 3875 } 3876 olens[i] = olen; 3877 dlens[i] = dlen; 3878 } 3879 PetscCall(MatCreate(comm, &M)); 3880 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3881 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3882 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3883 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3884 PetscCall(PetscFree(dlens)); 3885 } else { 3886 PetscInt ml, nl; 3887 3888 M = *newmat; 3889 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3890 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3891 PetscCall(MatZeroEntries(M)); 3892 /* 3893 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3894 rather than the slower MatSetValues(). 3895 */ 3896 M->was_assembled = PETSC_TRUE; 3897 M->assembled = PETSC_FALSE; 3898 } 3899 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3900 aij = (Mat_SeqAIJ *)Mreuse->data; 3901 ii = aij->i; 3902 jj = aij->j; 3903 3904 /* trigger copy to CPU if needed */ 3905 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3906 for (i = 0; i < m; i++) { 3907 row = rstart + i; 3908 nz = ii[i + 1] - ii[i]; 3909 cwork = jj; 3910 jj = PetscSafePointerPlusOffset(jj, nz); 3911 vwork = aa; 3912 aa = PetscSafePointerPlusOffset(aa, nz); 3913 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3914 } 3915 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3916 3917 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3918 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3919 *newmat = M; 3920 3921 /* save submatrix used in processor for next request */ 3922 if (call == MAT_INITIAL_MATRIX) { 3923 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3924 PetscCall(MatDestroy(&Mreuse)); 3925 } 3926 PetscFunctionReturn(PETSC_SUCCESS); 3927 } 3928 3929 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3930 { 3931 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3932 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3933 const PetscInt *JJ; 3934 PetscBool nooffprocentries; 3935 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3936 3937 PetscFunctionBegin; 3938 PetscCall(PetscLayoutSetUp(B->rmap)); 3939 PetscCall(PetscLayoutSetUp(B->cmap)); 3940 m = B->rmap->n; 3941 cstart = B->cmap->rstart; 3942 cend = B->cmap->rend; 3943 rstart = B->rmap->rstart; 3944 irstart = Ii[0]; 3945 3946 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3947 3948 if (PetscDefined(USE_DEBUG)) { 3949 for (i = 0; i < m; i++) { 3950 nnz = Ii[i + 1] - Ii[i]; 3951 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3952 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3953 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3954 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3955 } 3956 } 3957 3958 for (i = 0; i < m; i++) { 3959 nnz = Ii[i + 1] - Ii[i]; 3960 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3961 nnz_max = PetscMax(nnz_max, nnz); 3962 d = 0; 3963 for (j = 0; j < nnz; j++) { 3964 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3965 } 3966 d_nnz[i] = d; 3967 o_nnz[i] = nnz - d; 3968 } 3969 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3970 PetscCall(PetscFree2(d_nnz, o_nnz)); 3971 3972 for (i = 0; i < m; i++) { 3973 ii = i + rstart; 3974 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3975 } 3976 nooffprocentries = B->nooffprocentries; 3977 B->nooffprocentries = PETSC_TRUE; 3978 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3979 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3980 B->nooffprocentries = nooffprocentries; 3981 3982 /* count number of entries below block diagonal */ 3983 PetscCall(PetscFree(Aij->ld)); 3984 PetscCall(PetscCalloc1(m, &ld)); 3985 Aij->ld = ld; 3986 for (i = 0; i < m; i++) { 3987 nnz = Ii[i + 1] - Ii[i]; 3988 j = 0; 3989 while (j < nnz && J[j] < cstart) j++; 3990 ld[i] = j; 3991 if (J) J += nnz; 3992 } 3993 3994 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3995 PetscFunctionReturn(PETSC_SUCCESS); 3996 } 3997 3998 /*@ 3999 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 4000 (the default parallel PETSc format). 4001 4002 Collective 4003 4004 Input Parameters: 4005 + B - the matrix 4006 . i - the indices into `j` for the start of each local row (indices start with zero) 4007 . j - the column indices for each local row (indices start with zero) 4008 - v - optional values in the matrix 4009 4010 Level: developer 4011 4012 Notes: 4013 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 4014 thus you CANNOT change the matrix entries by changing the values of `v` after you have 4015 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4016 4017 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4018 4019 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4020 4021 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4022 4023 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4024 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4025 4026 The format which is used for the sparse matrix input, is equivalent to a 4027 row-major ordering.. i.e for the following matrix, the input data expected is 4028 as shown 4029 .vb 4030 1 0 0 4031 2 0 3 P0 4032 ------- 4033 4 5 6 P1 4034 4035 Process0 [P0] rows_owned=[0,1] 4036 i = {0,1,3} [size = nrow+1 = 2+1] 4037 j = {0,0,2} [size = 3] 4038 v = {1,2,3} [size = 3] 4039 4040 Process1 [P1] rows_owned=[2] 4041 i = {0,3} [size = nrow+1 = 1+1] 4042 j = {0,1,2} [size = 3] 4043 v = {4,5,6} [size = 3] 4044 .ve 4045 4046 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4047 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4048 @*/ 4049 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4050 { 4051 PetscFunctionBegin; 4052 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4053 PetscFunctionReturn(PETSC_SUCCESS); 4054 } 4055 4056 /*@ 4057 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4058 (the default parallel PETSc format). For good matrix assembly performance 4059 the user should preallocate the matrix storage by setting the parameters 4060 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4061 4062 Collective 4063 4064 Input Parameters: 4065 + B - the matrix 4066 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4067 (same value is used for all local rows) 4068 . d_nnz - array containing the number of nonzeros in the various rows of the 4069 DIAGONAL portion of the local submatrix (possibly different for each row) 4070 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4071 The size of this array is equal to the number of local rows, i.e 'm'. 4072 For matrices that will be factored, you must leave room for (and set) 4073 the diagonal entry even if it is zero. 4074 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4075 submatrix (same value is used for all local rows). 4076 - o_nnz - array containing the number of nonzeros in the various rows of the 4077 OFF-DIAGONAL portion of the local submatrix (possibly different for 4078 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4079 structure. The size of this array is equal to the number 4080 of local rows, i.e 'm'. 4081 4082 Example Usage: 4083 Consider the following 8x8 matrix with 34 non-zero values, that is 4084 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4085 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4086 as follows 4087 4088 .vb 4089 1 2 0 | 0 3 0 | 0 4 4090 Proc0 0 5 6 | 7 0 0 | 8 0 4091 9 0 10 | 11 0 0 | 12 0 4092 ------------------------------------- 4093 13 0 14 | 15 16 17 | 0 0 4094 Proc1 0 18 0 | 19 20 21 | 0 0 4095 0 0 0 | 22 23 0 | 24 0 4096 ------------------------------------- 4097 Proc2 25 26 27 | 0 0 28 | 29 0 4098 30 0 0 | 31 32 33 | 0 34 4099 .ve 4100 4101 This can be represented as a collection of submatrices as 4102 .vb 4103 A B C 4104 D E F 4105 G H I 4106 .ve 4107 4108 Where the submatrices A,B,C are owned by proc0, D,E,F are 4109 owned by proc1, G,H,I are owned by proc2. 4110 4111 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4112 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4113 The 'M','N' parameters are 8,8, and have the same values on all procs. 4114 4115 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4116 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4117 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4118 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4119 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4120 matrix, and [DF] as another `MATSEQAIJ` matrix. 4121 4122 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4123 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4124 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4125 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4126 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4127 In this case, the values of `d_nz`, `o_nz` are 4128 .vb 4129 proc0 dnz = 2, o_nz = 2 4130 proc1 dnz = 3, o_nz = 2 4131 proc2 dnz = 1, o_nz = 4 4132 .ve 4133 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4134 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4135 for proc3. i.e we are using 12+15+10=37 storage locations to store 4136 34 values. 4137 4138 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4139 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4140 In the above case the values for `d_nnz`, `o_nnz` are 4141 .vb 4142 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4143 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4144 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4145 .ve 4146 Here the space allocated is sum of all the above values i.e 34, and 4147 hence pre-allocation is perfect. 4148 4149 Level: intermediate 4150 4151 Notes: 4152 If the *_nnz parameter is given then the *_nz parameter is ignored 4153 4154 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4155 storage. The stored row and column indices begin with zero. 4156 See [Sparse Matrices](sec_matsparse) for details. 4157 4158 The parallel matrix is partitioned such that the first m0 rows belong to 4159 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4160 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4161 4162 The DIAGONAL portion of the local submatrix of a processor can be defined 4163 as the submatrix which is obtained by extraction the part corresponding to 4164 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4165 first row that belongs to the processor, r2 is the last row belonging to 4166 the this processor, and c1-c2 is range of indices of the local part of a 4167 vector suitable for applying the matrix to. This is an mxn matrix. In the 4168 common case of a square matrix, the row and column ranges are the same and 4169 the DIAGONAL part is also square. The remaining portion of the local 4170 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4171 4172 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4173 4174 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4175 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4176 You can also run with the option `-info` and look for messages with the string 4177 malloc in them to see if additional memory allocation was needed. 4178 4179 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4180 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4181 @*/ 4182 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4183 { 4184 PetscFunctionBegin; 4185 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4186 PetscValidType(B, 1); 4187 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4188 PetscFunctionReturn(PETSC_SUCCESS); 4189 } 4190 4191 /*@ 4192 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4193 CSR format for the local rows. 4194 4195 Collective 4196 4197 Input Parameters: 4198 + comm - MPI communicator 4199 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4200 . n - This value should be the same as the local size used in creating the 4201 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4202 calculated if `N` is given) For square matrices n is almost always `m`. 4203 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4204 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4205 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4206 . j - global column indices 4207 - a - optional matrix values 4208 4209 Output Parameter: 4210 . mat - the matrix 4211 4212 Level: intermediate 4213 4214 Notes: 4215 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4216 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4217 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4218 4219 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4220 4221 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4222 4223 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4224 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4225 4226 The format which is used for the sparse matrix input, is equivalent to a 4227 row-major ordering, i.e., for the following matrix, the input data expected is 4228 as shown 4229 .vb 4230 1 0 0 4231 2 0 3 P0 4232 ------- 4233 4 5 6 P1 4234 4235 Process0 [P0] rows_owned=[0,1] 4236 i = {0,1,3} [size = nrow+1 = 2+1] 4237 j = {0,0,2} [size = 3] 4238 v = {1,2,3} [size = 3] 4239 4240 Process1 [P1] rows_owned=[2] 4241 i = {0,3} [size = nrow+1 = 1+1] 4242 j = {0,1,2} [size = 3] 4243 v = {4,5,6} [size = 3] 4244 .ve 4245 4246 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4247 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4248 @*/ 4249 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4250 { 4251 PetscFunctionBegin; 4252 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4253 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4254 PetscCall(MatCreate(comm, mat)); 4255 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4256 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4257 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4258 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4259 PetscFunctionReturn(PETSC_SUCCESS); 4260 } 4261 4262 /*@ 4263 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4264 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4265 from `MatCreateMPIAIJWithArrays()` 4266 4267 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4268 4269 Collective 4270 4271 Input Parameters: 4272 + mat - the matrix 4273 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4274 . n - This value should be the same as the local size used in creating the 4275 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4276 calculated if N is given) For square matrices n is almost always m. 4277 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4278 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4279 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4280 . J - column indices 4281 - v - matrix values 4282 4283 Level: deprecated 4284 4285 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4286 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4287 @*/ 4288 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4289 { 4290 PetscInt nnz, i; 4291 PetscBool nooffprocentries; 4292 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4293 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4294 PetscScalar *ad, *ao; 4295 PetscInt ldi, Iii, md; 4296 const PetscInt *Adi = Ad->i; 4297 PetscInt *ld = Aij->ld; 4298 4299 PetscFunctionBegin; 4300 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4301 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4302 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4303 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4304 4305 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4306 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4307 4308 for (i = 0; i < m; i++) { 4309 if (PetscDefined(USE_DEBUG)) { 4310 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4311 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4312 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4313 } 4314 } 4315 nnz = Ii[i + 1] - Ii[i]; 4316 Iii = Ii[i]; 4317 ldi = ld[i]; 4318 md = Adi[i + 1] - Adi[i]; 4319 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4320 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4321 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4322 ad += md; 4323 ao += nnz - md; 4324 } 4325 nooffprocentries = mat->nooffprocentries; 4326 mat->nooffprocentries = PETSC_TRUE; 4327 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4328 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4329 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4330 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4332 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4333 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4334 mat->nooffprocentries = nooffprocentries; 4335 PetscFunctionReturn(PETSC_SUCCESS); 4336 } 4337 4338 /*@ 4339 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4340 4341 Collective 4342 4343 Input Parameters: 4344 + mat - the matrix 4345 - v - matrix values, stored by row 4346 4347 Level: intermediate 4348 4349 Notes: 4350 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4351 4352 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4353 4354 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4355 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4356 @*/ 4357 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4358 { 4359 PetscInt nnz, i, m; 4360 PetscBool nooffprocentries; 4361 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4362 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4363 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4364 PetscScalar *ad, *ao; 4365 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4366 PetscInt ldi, Iii, md; 4367 PetscInt *ld = Aij->ld; 4368 4369 PetscFunctionBegin; 4370 m = mat->rmap->n; 4371 4372 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4373 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4374 Iii = 0; 4375 for (i = 0; i < m; i++) { 4376 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4377 ldi = ld[i]; 4378 md = Adi[i + 1] - Adi[i]; 4379 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4380 ad += md; 4381 if (ao) { 4382 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4383 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4384 ao += nnz - md; 4385 } 4386 Iii += nnz; 4387 } 4388 nooffprocentries = mat->nooffprocentries; 4389 mat->nooffprocentries = PETSC_TRUE; 4390 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4391 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4392 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4393 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4394 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4395 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4396 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4397 mat->nooffprocentries = nooffprocentries; 4398 PetscFunctionReturn(PETSC_SUCCESS); 4399 } 4400 4401 /*@ 4402 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4403 (the default parallel PETSc format). For good matrix assembly performance 4404 the user should preallocate the matrix storage by setting the parameters 4405 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4406 4407 Collective 4408 4409 Input Parameters: 4410 + comm - MPI communicator 4411 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4412 This value should be the same as the local size used in creating the 4413 y vector for the matrix-vector product y = Ax. 4414 . n - This value should be the same as the local size used in creating the 4415 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4416 calculated if N is given) For square matrices n is almost always m. 4417 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4418 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4419 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4420 (same value is used for all local rows) 4421 . d_nnz - array containing the number of nonzeros in the various rows of the 4422 DIAGONAL portion of the local submatrix (possibly different for each row) 4423 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4424 The size of this array is equal to the number of local rows, i.e 'm'. 4425 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4426 submatrix (same value is used for all local rows). 4427 - o_nnz - array containing the number of nonzeros in the various rows of the 4428 OFF-DIAGONAL portion of the local submatrix (possibly different for 4429 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4430 structure. The size of this array is equal to the number 4431 of local rows, i.e 'm'. 4432 4433 Output Parameter: 4434 . A - the matrix 4435 4436 Options Database Keys: 4437 + -mat_no_inode - Do not use inodes 4438 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4439 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4440 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4441 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4442 4443 Level: intermediate 4444 4445 Notes: 4446 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4447 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4448 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4449 4450 If the *_nnz parameter is given then the *_nz parameter is ignored 4451 4452 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4453 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4454 storage requirements for this matrix. 4455 4456 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4457 processor than it must be used on all processors that share the object for 4458 that argument. 4459 4460 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4461 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4462 4463 The user MUST specify either the local or global matrix dimensions 4464 (possibly both). 4465 4466 The parallel matrix is partitioned across processors such that the 4467 first `m0` rows belong to process 0, the next `m1` rows belong to 4468 process 1, the next `m2` rows belong to process 2, etc., where 4469 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4470 values corresponding to [m x N] submatrix. 4471 4472 The columns are logically partitioned with the n0 columns belonging 4473 to 0th partition, the next n1 columns belonging to the next 4474 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4475 4476 The DIAGONAL portion of the local submatrix on any given processor 4477 is the submatrix corresponding to the rows and columns m,n 4478 corresponding to the given processor. i.e diagonal matrix on 4479 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4480 etc. The remaining portion of the local submatrix [m x (N-n)] 4481 constitute the OFF-DIAGONAL portion. The example below better 4482 illustrates this concept. The two matrices, the DIAGONAL portion and 4483 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4484 4485 For a square global matrix we define each processor's diagonal portion 4486 to be its local rows and the corresponding columns (a square submatrix); 4487 each processor's off-diagonal portion encompasses the remainder of the 4488 local matrix (a rectangular submatrix). 4489 4490 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4491 4492 When calling this routine with a single process communicator, a matrix of 4493 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4494 type of communicator, use the construction mechanism 4495 .vb 4496 MatCreate(..., &A); 4497 MatSetType(A, MATMPIAIJ); 4498 MatSetSizes(A, m, n, M, N); 4499 MatMPIAIJSetPreallocation(A, ...); 4500 .ve 4501 4502 By default, this format uses inodes (identical nodes) when possible. 4503 We search for consecutive rows with the same nonzero structure, thereby 4504 reusing matrix information to achieve increased efficiency. 4505 4506 Example Usage: 4507 Consider the following 8x8 matrix with 34 non-zero values, that is 4508 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4509 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4510 as follows 4511 4512 .vb 4513 1 2 0 | 0 3 0 | 0 4 4514 Proc0 0 5 6 | 7 0 0 | 8 0 4515 9 0 10 | 11 0 0 | 12 0 4516 ------------------------------------- 4517 13 0 14 | 15 16 17 | 0 0 4518 Proc1 0 18 0 | 19 20 21 | 0 0 4519 0 0 0 | 22 23 0 | 24 0 4520 ------------------------------------- 4521 Proc2 25 26 27 | 0 0 28 | 29 0 4522 30 0 0 | 31 32 33 | 0 34 4523 .ve 4524 4525 This can be represented as a collection of submatrices as 4526 4527 .vb 4528 A B C 4529 D E F 4530 G H I 4531 .ve 4532 4533 Where the submatrices A,B,C are owned by proc0, D,E,F are 4534 owned by proc1, G,H,I are owned by proc2. 4535 4536 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4537 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4538 The 'M','N' parameters are 8,8, and have the same values on all procs. 4539 4540 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4541 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4542 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4543 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4544 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4545 matrix, and [DF] as another SeqAIJ matrix. 4546 4547 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4548 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4549 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4550 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4551 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4552 In this case, the values of `d_nz`,`o_nz` are 4553 .vb 4554 proc0 dnz = 2, o_nz = 2 4555 proc1 dnz = 3, o_nz = 2 4556 proc2 dnz = 1, o_nz = 4 4557 .ve 4558 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4559 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4560 for proc3. i.e we are using 12+15+10=37 storage locations to store 4561 34 values. 4562 4563 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4564 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4565 In the above case the values for d_nnz,o_nnz are 4566 .vb 4567 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4568 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4569 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4570 .ve 4571 Here the space allocated is sum of all the above values i.e 34, and 4572 hence pre-allocation is perfect. 4573 4574 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4575 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4576 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4577 @*/ 4578 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4579 { 4580 PetscMPIInt size; 4581 4582 PetscFunctionBegin; 4583 PetscCall(MatCreate(comm, A)); 4584 PetscCall(MatSetSizes(*A, m, n, M, N)); 4585 PetscCallMPI(MPI_Comm_size(comm, &size)); 4586 if (size > 1) { 4587 PetscCall(MatSetType(*A, MATMPIAIJ)); 4588 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4589 } else { 4590 PetscCall(MatSetType(*A, MATSEQAIJ)); 4591 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4592 } 4593 PetscFunctionReturn(PETSC_SUCCESS); 4594 } 4595 4596 /*@C 4597 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4598 4599 Not Collective 4600 4601 Input Parameter: 4602 . A - The `MATMPIAIJ` matrix 4603 4604 Output Parameters: 4605 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4606 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4607 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4608 4609 Level: intermediate 4610 4611 Note: 4612 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4613 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4614 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4615 local column numbers to global column numbers in the original matrix. 4616 4617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4618 @*/ 4619 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4620 { 4621 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4622 PetscBool flg; 4623 4624 PetscFunctionBegin; 4625 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4626 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4627 if (Ad) *Ad = a->A; 4628 if (Ao) *Ao = a->B; 4629 if (colmap) *colmap = a->garray; 4630 PetscFunctionReturn(PETSC_SUCCESS); 4631 } 4632 4633 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4634 { 4635 PetscInt m, N, i, rstart, nnz, Ii; 4636 PetscInt *indx; 4637 PetscScalar *values; 4638 MatType rootType; 4639 4640 PetscFunctionBegin; 4641 PetscCall(MatGetSize(inmat, &m, &N)); 4642 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4643 PetscInt *dnz, *onz, sum, bs, cbs; 4644 4645 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4646 /* Check sum(n) = N */ 4647 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4648 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4649 4650 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4651 rstart -= m; 4652 4653 MatPreallocateBegin(comm, m, n, dnz, onz); 4654 for (i = 0; i < m; i++) { 4655 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4656 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4657 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4658 } 4659 4660 PetscCall(MatCreate(comm, outmat)); 4661 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4662 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4663 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4664 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4665 PetscCall(MatSetType(*outmat, rootType)); 4666 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4667 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4668 MatPreallocateEnd(dnz, onz); 4669 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4670 } 4671 4672 /* numeric phase */ 4673 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4674 for (i = 0; i < m; i++) { 4675 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4676 Ii = i + rstart; 4677 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4678 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4679 } 4680 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4681 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4682 PetscFunctionReturn(PETSC_SUCCESS); 4683 } 4684 4685 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4686 { 4687 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4688 4689 PetscFunctionBegin; 4690 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4691 PetscCall(PetscFree(merge->id_r)); 4692 PetscCall(PetscFree(merge->len_s)); 4693 PetscCall(PetscFree(merge->len_r)); 4694 PetscCall(PetscFree(merge->bi)); 4695 PetscCall(PetscFree(merge->bj)); 4696 PetscCall(PetscFree(merge->buf_ri[0])); 4697 PetscCall(PetscFree(merge->buf_ri)); 4698 PetscCall(PetscFree(merge->buf_rj[0])); 4699 PetscCall(PetscFree(merge->buf_rj)); 4700 PetscCall(PetscFree(merge->coi)); 4701 PetscCall(PetscFree(merge->coj)); 4702 PetscCall(PetscFree(merge->owners_co)); 4703 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4704 PetscCall(PetscFree(merge)); 4705 PetscFunctionReturn(PETSC_SUCCESS); 4706 } 4707 4708 #include <../src/mat/utils/freespace.h> 4709 #include <petscbt.h> 4710 4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4712 { 4713 MPI_Comm comm; 4714 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4715 PetscMPIInt size, rank, taga, *len_s; 4716 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4717 PetscMPIInt proc, k; 4718 PetscInt **buf_ri, **buf_rj; 4719 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4720 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4721 MPI_Request *s_waits, *r_waits; 4722 MPI_Status *status; 4723 const MatScalar *aa, *a_a; 4724 MatScalar **abuf_r, *ba_i; 4725 Mat_Merge_SeqsToMPI *merge; 4726 PetscContainer container; 4727 4728 PetscFunctionBegin; 4729 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4730 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4731 4732 PetscCallMPI(MPI_Comm_size(comm, &size)); 4733 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4734 4735 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4736 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4737 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4738 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4739 aa = a_a; 4740 4741 bi = merge->bi; 4742 bj = merge->bj; 4743 buf_ri = merge->buf_ri; 4744 buf_rj = merge->buf_rj; 4745 4746 PetscCall(PetscMalloc1(size, &status)); 4747 owners = merge->rowmap->range; 4748 len_s = merge->len_s; 4749 4750 /* send and recv matrix values */ 4751 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4752 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4753 4754 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4755 for (proc = 0, k = 0; proc < size; proc++) { 4756 if (!len_s[proc]) continue; 4757 i = owners[proc]; 4758 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4759 k++; 4760 } 4761 4762 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4763 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4764 PetscCall(PetscFree(status)); 4765 4766 PetscCall(PetscFree(s_waits)); 4767 PetscCall(PetscFree(r_waits)); 4768 4769 /* insert mat values of mpimat */ 4770 PetscCall(PetscMalloc1(N, &ba_i)); 4771 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4772 4773 for (k = 0; k < merge->nrecv; k++) { 4774 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4775 nrows = *buf_ri_k[k]; 4776 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4777 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4778 } 4779 4780 /* set values of ba */ 4781 m = merge->rowmap->n; 4782 for (i = 0; i < m; i++) { 4783 arow = owners[rank] + i; 4784 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4785 bnzi = bi[i + 1] - bi[i]; 4786 PetscCall(PetscArrayzero(ba_i, bnzi)); 4787 4788 /* add local non-zero vals of this proc's seqmat into ba */ 4789 anzi = ai[arow + 1] - ai[arow]; 4790 aj = a->j + ai[arow]; 4791 aa = a_a + ai[arow]; 4792 nextaj = 0; 4793 for (j = 0; nextaj < anzi; j++) { 4794 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4795 ba_i[j] += aa[nextaj++]; 4796 } 4797 } 4798 4799 /* add received vals into ba */ 4800 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4801 /* i-th row */ 4802 if (i == *nextrow[k]) { 4803 anzi = *(nextai[k] + 1) - *nextai[k]; 4804 aj = buf_rj[k] + *nextai[k]; 4805 aa = abuf_r[k] + *nextai[k]; 4806 nextaj = 0; 4807 for (j = 0; nextaj < anzi; j++) { 4808 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4809 ba_i[j] += aa[nextaj++]; 4810 } 4811 } 4812 nextrow[k]++; 4813 nextai[k]++; 4814 } 4815 } 4816 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4817 } 4818 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4819 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4820 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4821 4822 PetscCall(PetscFree(abuf_r[0])); 4823 PetscCall(PetscFree(abuf_r)); 4824 PetscCall(PetscFree(ba_i)); 4825 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4826 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4827 PetscFunctionReturn(PETSC_SUCCESS); 4828 } 4829 4830 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4831 { 4832 Mat B_mpi; 4833 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4834 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4835 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4836 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4837 PetscInt len, *dnz, *onz, bs, cbs; 4838 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4839 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4840 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4841 MPI_Status *status; 4842 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4843 PetscBT lnkbt; 4844 Mat_Merge_SeqsToMPI *merge; 4845 PetscContainer container; 4846 4847 PetscFunctionBegin; 4848 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4849 4850 /* make sure it is a PETSc comm */ 4851 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4852 PetscCallMPI(MPI_Comm_size(comm, &size)); 4853 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4854 4855 PetscCall(PetscNew(&merge)); 4856 PetscCall(PetscMalloc1(size, &status)); 4857 4858 /* determine row ownership */ 4859 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4860 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4861 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4862 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4863 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4864 PetscCall(PetscMalloc1(size, &len_si)); 4865 PetscCall(PetscMalloc1(size, &merge->len_s)); 4866 4867 m = merge->rowmap->n; 4868 owners = merge->rowmap->range; 4869 4870 /* determine the number of messages to send, their lengths */ 4871 len_s = merge->len_s; 4872 4873 len = 0; /* length of buf_si[] */ 4874 merge->nsend = 0; 4875 for (PetscMPIInt proc = 0; proc < size; proc++) { 4876 len_si[proc] = 0; 4877 if (proc == rank) { 4878 len_s[proc] = 0; 4879 } else { 4880 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4881 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4882 } 4883 if (len_s[proc]) { 4884 merge->nsend++; 4885 nrows = 0; 4886 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4887 if (ai[i + 1] > ai[i]) nrows++; 4888 } 4889 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4890 len += len_si[proc]; 4891 } 4892 } 4893 4894 /* determine the number and length of messages to receive for ij-structure */ 4895 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4896 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4897 4898 /* post the Irecv of j-structure */ 4899 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4900 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4901 4902 /* post the Isend of j-structure */ 4903 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4904 4905 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4906 if (!len_s[proc]) continue; 4907 i = owners[proc]; 4908 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4909 k++; 4910 } 4911 4912 /* receives and sends of j-structure are complete */ 4913 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4914 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4915 4916 /* send and recv i-structure */ 4917 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4918 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4919 4920 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4921 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4922 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4923 if (!len_s[proc]) continue; 4924 /* form outgoing message for i-structure: 4925 buf_si[0]: nrows to be sent 4926 [1:nrows]: row index (global) 4927 [nrows+1:2*nrows+1]: i-structure index 4928 */ 4929 nrows = len_si[proc] / 2 - 1; 4930 buf_si_i = buf_si + nrows + 1; 4931 buf_si[0] = nrows; 4932 buf_si_i[0] = 0; 4933 nrows = 0; 4934 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4935 anzi = ai[i + 1] - ai[i]; 4936 if (anzi) { 4937 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4938 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4939 nrows++; 4940 } 4941 } 4942 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4943 k++; 4944 buf_si += len_si[proc]; 4945 } 4946 4947 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4948 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4949 4950 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4951 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4952 4953 PetscCall(PetscFree(len_si)); 4954 PetscCall(PetscFree(len_ri)); 4955 PetscCall(PetscFree(rj_waits)); 4956 PetscCall(PetscFree2(si_waits, sj_waits)); 4957 PetscCall(PetscFree(ri_waits)); 4958 PetscCall(PetscFree(buf_s)); 4959 PetscCall(PetscFree(status)); 4960 4961 /* compute a local seq matrix in each processor */ 4962 /* allocate bi array and free space for accumulating nonzero column info */ 4963 PetscCall(PetscMalloc1(m + 1, &bi)); 4964 bi[0] = 0; 4965 4966 /* create and initialize a linked list */ 4967 nlnk = N + 1; 4968 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4969 4970 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4971 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4972 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4973 4974 current_space = free_space; 4975 4976 /* determine symbolic info for each local row */ 4977 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4978 4979 for (k = 0; k < merge->nrecv; k++) { 4980 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4981 nrows = *buf_ri_k[k]; 4982 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4983 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4984 } 4985 4986 MatPreallocateBegin(comm, m, n, dnz, onz); 4987 len = 0; 4988 for (i = 0; i < m; i++) { 4989 bnzi = 0; 4990 /* add local non-zero cols of this proc's seqmat into lnk */ 4991 arow = owners[rank] + i; 4992 anzi = ai[arow + 1] - ai[arow]; 4993 aj = a->j + ai[arow]; 4994 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4995 bnzi += nlnk; 4996 /* add received col data into lnk */ 4997 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4998 if (i == *nextrow[k]) { /* i-th row */ 4999 anzi = *(nextai[k] + 1) - *nextai[k]; 5000 aj = buf_rj[k] + *nextai[k]; 5001 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5002 bnzi += nlnk; 5003 nextrow[k]++; 5004 nextai[k]++; 5005 } 5006 } 5007 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5008 5009 /* if free space is not available, make more free space */ 5010 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5011 /* copy data into free space, then initialize lnk */ 5012 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5013 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5014 5015 current_space->array += bnzi; 5016 current_space->local_used += bnzi; 5017 current_space->local_remaining -= bnzi; 5018 5019 bi[i + 1] = bi[i] + bnzi; 5020 } 5021 5022 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5023 5024 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5025 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5026 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5027 5028 /* create symbolic parallel matrix B_mpi */ 5029 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5030 PetscCall(MatCreate(comm, &B_mpi)); 5031 if (n == PETSC_DECIDE) { 5032 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5033 } else { 5034 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5035 } 5036 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5037 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5038 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5039 MatPreallocateEnd(dnz, onz); 5040 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5041 5042 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5043 B_mpi->assembled = PETSC_FALSE; 5044 merge->bi = bi; 5045 merge->bj = bj; 5046 merge->buf_ri = buf_ri; 5047 merge->buf_rj = buf_rj; 5048 merge->coi = NULL; 5049 merge->coj = NULL; 5050 merge->owners_co = NULL; 5051 5052 PetscCall(PetscCommDestroy(&comm)); 5053 5054 /* attach the supporting struct to B_mpi for reuse */ 5055 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5056 PetscCall(PetscContainerSetPointer(container, merge)); 5057 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5058 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5059 PetscCall(PetscContainerDestroy(&container)); 5060 *mpimat = B_mpi; 5061 5062 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5063 PetscFunctionReturn(PETSC_SUCCESS); 5064 } 5065 5066 /*@ 5067 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5068 matrices from each processor 5069 5070 Collective 5071 5072 Input Parameters: 5073 + comm - the communicators the parallel matrix will live on 5074 . seqmat - the input sequential matrices 5075 . m - number of local rows (or `PETSC_DECIDE`) 5076 . n - number of local columns (or `PETSC_DECIDE`) 5077 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5078 5079 Output Parameter: 5080 . mpimat - the parallel matrix generated 5081 5082 Level: advanced 5083 5084 Note: 5085 The dimensions of the sequential matrix in each processor MUST be the same. 5086 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5087 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5088 5089 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5090 @*/ 5091 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5092 { 5093 PetscMPIInt size; 5094 5095 PetscFunctionBegin; 5096 PetscCallMPI(MPI_Comm_size(comm, &size)); 5097 if (size == 1) { 5098 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 if (scall == MAT_INITIAL_MATRIX) { 5100 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5101 } else { 5102 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5103 } 5104 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5105 PetscFunctionReturn(PETSC_SUCCESS); 5106 } 5107 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5109 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5110 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 PetscFunctionReturn(PETSC_SUCCESS); 5112 } 5113 5114 /*@ 5115 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5116 5117 Not Collective 5118 5119 Input Parameter: 5120 . A - the matrix 5121 5122 Output Parameter: 5123 . A_loc - the local sequential matrix generated 5124 5125 Level: developer 5126 5127 Notes: 5128 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5129 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5130 `n` is the global column count obtained with `MatGetSize()` 5131 5132 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5133 5134 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5135 5136 Destroy the matrix with `MatDestroy()` 5137 5138 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5139 @*/ 5140 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5141 { 5142 PetscBool mpi; 5143 5144 PetscFunctionBegin; 5145 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5146 if (mpi) { 5147 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5148 } else { 5149 *A_loc = A; 5150 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5151 } 5152 PetscFunctionReturn(PETSC_SUCCESS); 5153 } 5154 5155 /*@ 5156 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5171 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5172 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5173 5174 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5175 5176 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5177 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5178 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5179 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5180 5181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5182 @*/ 5183 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5184 { 5185 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5186 Mat_SeqAIJ *mat, *a, *b; 5187 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5188 const PetscScalar *aa, *ba, *aav, *bav; 5189 PetscScalar *ca, *cam; 5190 PetscMPIInt size; 5191 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5192 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5193 PetscBool match; 5194 5195 PetscFunctionBegin; 5196 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5197 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5198 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5199 if (size == 1) { 5200 if (scall == MAT_INITIAL_MATRIX) { 5201 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5202 *A_loc = mpimat->A; 5203 } else if (scall == MAT_REUSE_MATRIX) { 5204 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5205 } 5206 PetscFunctionReturn(PETSC_SUCCESS); 5207 } 5208 5209 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5210 a = (Mat_SeqAIJ *)mpimat->A->data; 5211 b = (Mat_SeqAIJ *)mpimat->B->data; 5212 ai = a->i; 5213 aj = a->j; 5214 bi = b->i; 5215 bj = b->j; 5216 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5217 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5218 aa = aav; 5219 ba = bav; 5220 if (scall == MAT_INITIAL_MATRIX) { 5221 PetscCall(PetscMalloc1(1 + am, &ci)); 5222 ci[0] = 0; 5223 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5224 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5225 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5226 k = 0; 5227 for (i = 0; i < am; i++) { 5228 ncols_o = bi[i + 1] - bi[i]; 5229 ncols_d = ai[i + 1] - ai[i]; 5230 /* off-diagonal portion of A */ 5231 for (jo = 0; jo < ncols_o; jo++) { 5232 col = cmap[*bj]; 5233 if (col >= cstart) break; 5234 cj[k] = col; 5235 bj++; 5236 ca[k++] = *ba++; 5237 } 5238 /* diagonal portion of A */ 5239 for (j = 0; j < ncols_d; j++) { 5240 cj[k] = cstart + *aj++; 5241 ca[k++] = *aa++; 5242 } 5243 /* off-diagonal portion of A */ 5244 for (j = jo; j < ncols_o; j++) { 5245 cj[k] = cmap[*bj++]; 5246 ca[k++] = *ba++; 5247 } 5248 } 5249 /* put together the new matrix */ 5250 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5251 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5252 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5253 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5254 mat->free_a = PETSC_TRUE; 5255 mat->free_ij = PETSC_TRUE; 5256 mat->nonew = 0; 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5259 ci = mat->i; 5260 cj = mat->j; 5261 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5262 for (i = 0; i < am; i++) { 5263 /* off-diagonal portion of A */ 5264 ncols_o = bi[i + 1] - bi[i]; 5265 for (jo = 0; jo < ncols_o; jo++) { 5266 col = cmap[*bj]; 5267 if (col >= cstart) break; 5268 *cam++ = *ba++; 5269 bj++; 5270 } 5271 /* diagonal portion of A */ 5272 ncols_d = ai[i + 1] - ai[i]; 5273 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5274 /* off-diagonal portion of A */ 5275 for (j = jo; j < ncols_o; j++) { 5276 *cam++ = *ba++; 5277 bj++; 5278 } 5279 } 5280 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5281 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5282 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5283 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5284 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5285 PetscFunctionReturn(PETSC_SUCCESS); 5286 } 5287 5288 /*@ 5289 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5290 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5291 5292 Not Collective 5293 5294 Input Parameters: 5295 + A - the matrix 5296 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5297 5298 Output Parameters: 5299 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5300 - A_loc - the local sequential matrix generated 5301 5302 Level: developer 5303 5304 Note: 5305 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5306 part, then those associated with the off-diagonal part (in its local ordering) 5307 5308 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5309 @*/ 5310 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5311 { 5312 Mat Ao, Ad; 5313 const PetscInt *cmap; 5314 PetscMPIInt size; 5315 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5316 5317 PetscFunctionBegin; 5318 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5319 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5320 if (size == 1) { 5321 if (scall == MAT_INITIAL_MATRIX) { 5322 PetscCall(PetscObjectReference((PetscObject)Ad)); 5323 *A_loc = Ad; 5324 } else if (scall == MAT_REUSE_MATRIX) { 5325 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5326 } 5327 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5328 PetscFunctionReturn(PETSC_SUCCESS); 5329 } 5330 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5331 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5332 if (f) { 5333 PetscCall((*f)(A, scall, glob, A_loc)); 5334 } else { 5335 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5336 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5337 Mat_SeqAIJ *c; 5338 PetscInt *ai = a->i, *aj = a->j; 5339 PetscInt *bi = b->i, *bj = b->j; 5340 PetscInt *ci, *cj; 5341 const PetscScalar *aa, *ba; 5342 PetscScalar *ca; 5343 PetscInt i, j, am, dn, on; 5344 5345 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5346 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5347 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5348 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5349 if (scall == MAT_INITIAL_MATRIX) { 5350 PetscInt k; 5351 PetscCall(PetscMalloc1(1 + am, &ci)); 5352 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5353 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5354 ci[0] = 0; 5355 for (i = 0, k = 0; i < am; i++) { 5356 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5357 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5358 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5359 /* diagonal portion of A */ 5360 for (j = 0; j < ncols_d; j++, k++) { 5361 cj[k] = *aj++; 5362 ca[k] = *aa++; 5363 } 5364 /* off-diagonal portion of A */ 5365 for (j = 0; j < ncols_o; j++, k++) { 5366 cj[k] = dn + *bj++; 5367 ca[k] = *ba++; 5368 } 5369 } 5370 /* put together the new matrix */ 5371 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5372 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5373 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5374 c = (Mat_SeqAIJ *)(*A_loc)->data; 5375 c->free_a = PETSC_TRUE; 5376 c->free_ij = PETSC_TRUE; 5377 c->nonew = 0; 5378 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5379 } else if (scall == MAT_REUSE_MATRIX) { 5380 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5381 for (i = 0; i < am; i++) { 5382 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5383 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5384 /* diagonal portion of A */ 5385 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5386 /* off-diagonal portion of A */ 5387 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5388 } 5389 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5390 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5391 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5392 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5393 if (glob) { 5394 PetscInt cst, *gidx; 5395 5396 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5397 PetscCall(PetscMalloc1(dn + on, &gidx)); 5398 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5399 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5400 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5401 } 5402 } 5403 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5404 PetscFunctionReturn(PETSC_SUCCESS); 5405 } 5406 5407 /*@C 5408 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5409 5410 Not Collective 5411 5412 Input Parameters: 5413 + A - the matrix 5414 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5415 . row - index set of rows to extract (or `NULL`) 5416 - col - index set of columns to extract (or `NULL`) 5417 5418 Output Parameter: 5419 . A_loc - the local sequential matrix generated 5420 5421 Level: developer 5422 5423 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5424 @*/ 5425 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5426 { 5427 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5428 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5429 IS isrowa, iscola; 5430 Mat *aloc; 5431 PetscBool match; 5432 5433 PetscFunctionBegin; 5434 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5435 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5436 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5437 if (!row) { 5438 start = A->rmap->rstart; 5439 end = A->rmap->rend; 5440 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5441 } else { 5442 isrowa = *row; 5443 } 5444 if (!col) { 5445 start = A->cmap->rstart; 5446 cmap = a->garray; 5447 nzA = a->A->cmap->n; 5448 nzB = a->B->cmap->n; 5449 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5450 ncols = 0; 5451 for (i = 0; i < nzB; i++) { 5452 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5453 else break; 5454 } 5455 imark = i; 5456 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5457 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5458 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5459 } else { 5460 iscola = *col; 5461 } 5462 if (scall != MAT_INITIAL_MATRIX) { 5463 PetscCall(PetscMalloc1(1, &aloc)); 5464 aloc[0] = *A_loc; 5465 } 5466 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5467 if (!col) { /* attach global id of condensed columns */ 5468 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5469 } 5470 *A_loc = aloc[0]; 5471 PetscCall(PetscFree(aloc)); 5472 if (!row) PetscCall(ISDestroy(&isrowa)); 5473 if (!col) PetscCall(ISDestroy(&iscola)); 5474 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5475 PetscFunctionReturn(PETSC_SUCCESS); 5476 } 5477 5478 /* 5479 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5480 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5481 * on a global size. 5482 * */ 5483 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5484 { 5485 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5486 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5487 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5488 PetscMPIInt owner; 5489 PetscSFNode *iremote, *oiremote; 5490 const PetscInt *lrowindices; 5491 PetscSF sf, osf; 5492 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5493 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5494 MPI_Comm comm; 5495 ISLocalToGlobalMapping mapping; 5496 const PetscScalar *pd_a, *po_a; 5497 5498 PetscFunctionBegin; 5499 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5500 /* plocalsize is the number of roots 5501 * nrows is the number of leaves 5502 * */ 5503 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5504 PetscCall(ISGetLocalSize(rows, &nrows)); 5505 PetscCall(PetscCalloc1(nrows, &iremote)); 5506 PetscCall(ISGetIndices(rows, &lrowindices)); 5507 for (i = 0; i < nrows; i++) { 5508 /* Find a remote index and an owner for a row 5509 * The row could be local or remote 5510 * */ 5511 owner = 0; 5512 lidx = 0; 5513 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5514 iremote[i].index = lidx; 5515 iremote[i].rank = owner; 5516 } 5517 /* Create SF to communicate how many nonzero columns for each row */ 5518 PetscCall(PetscSFCreate(comm, &sf)); 5519 /* SF will figure out the number of nonzero columns for each row, and their 5520 * offsets 5521 * */ 5522 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5523 PetscCall(PetscSFSetFromOptions(sf)); 5524 PetscCall(PetscSFSetUp(sf)); 5525 5526 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5527 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5528 PetscCall(PetscCalloc1(nrows, &pnnz)); 5529 roffsets[0] = 0; 5530 roffsets[1] = 0; 5531 for (i = 0; i < plocalsize; i++) { 5532 /* diagonal */ 5533 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5534 /* off-diagonal */ 5535 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5536 /* compute offsets so that we relative location for each row */ 5537 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5538 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5539 } 5540 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5541 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5542 /* 'r' means root, and 'l' means leaf */ 5543 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5544 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5545 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5546 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5547 PetscCall(PetscSFDestroy(&sf)); 5548 PetscCall(PetscFree(roffsets)); 5549 PetscCall(PetscFree(nrcols)); 5550 dntotalcols = 0; 5551 ontotalcols = 0; 5552 ncol = 0; 5553 for (i = 0; i < nrows; i++) { 5554 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5555 ncol = PetscMax(pnnz[i], ncol); 5556 /* diagonal */ 5557 dntotalcols += nlcols[i * 2 + 0]; 5558 /* off-diagonal */ 5559 ontotalcols += nlcols[i * 2 + 1]; 5560 } 5561 /* We do not need to figure the right number of columns 5562 * since all the calculations will be done by going through the raw data 5563 * */ 5564 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5565 PetscCall(MatSetUp(*P_oth)); 5566 PetscCall(PetscFree(pnnz)); 5567 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5568 /* diagonal */ 5569 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5570 /* off-diagonal */ 5571 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5572 /* diagonal */ 5573 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5574 /* off-diagonal */ 5575 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5576 dntotalcols = 0; 5577 ontotalcols = 0; 5578 ntotalcols = 0; 5579 for (i = 0; i < nrows; i++) { 5580 owner = 0; 5581 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5582 /* Set iremote for diag matrix */ 5583 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5584 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5585 iremote[dntotalcols].rank = owner; 5586 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5587 ilocal[dntotalcols++] = ntotalcols++; 5588 } 5589 /* off-diagonal */ 5590 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5591 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5592 oiremote[ontotalcols].rank = owner; 5593 oilocal[ontotalcols++] = ntotalcols++; 5594 } 5595 } 5596 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5597 PetscCall(PetscFree(loffsets)); 5598 PetscCall(PetscFree(nlcols)); 5599 PetscCall(PetscSFCreate(comm, &sf)); 5600 /* P serves as roots and P_oth is leaves 5601 * Diag matrix 5602 * */ 5603 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5604 PetscCall(PetscSFSetFromOptions(sf)); 5605 PetscCall(PetscSFSetUp(sf)); 5606 5607 PetscCall(PetscSFCreate(comm, &osf)); 5608 /* off-diagonal */ 5609 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5610 PetscCall(PetscSFSetFromOptions(osf)); 5611 PetscCall(PetscSFSetUp(osf)); 5612 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5613 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5614 /* operate on the matrix internal data to save memory */ 5615 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5616 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5617 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5618 /* Convert to global indices for diag matrix */ 5619 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5620 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5621 /* We want P_oth store global indices */ 5622 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5623 /* Use memory scalable approach */ 5624 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5625 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5626 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5627 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5628 /* Convert back to local indices */ 5629 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5630 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5631 nout = 0; 5632 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5633 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5634 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5635 /* Exchange values */ 5636 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5637 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5638 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5639 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5640 /* Stop PETSc from shrinking memory */ 5641 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5642 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5643 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5644 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5645 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5646 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5647 PetscCall(PetscSFDestroy(&sf)); 5648 PetscCall(PetscSFDestroy(&osf)); 5649 PetscFunctionReturn(PETSC_SUCCESS); 5650 } 5651 5652 /* 5653 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5654 * This supports MPIAIJ and MAIJ 5655 * */ 5656 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5657 { 5658 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5659 Mat_SeqAIJ *p_oth; 5660 IS rows, map; 5661 PetscHMapI hamp; 5662 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5663 MPI_Comm comm; 5664 PetscSF sf, osf; 5665 PetscBool has; 5666 5667 PetscFunctionBegin; 5668 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5669 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5670 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5671 * and then create a submatrix (that often is an overlapping matrix) 5672 * */ 5673 if (reuse == MAT_INITIAL_MATRIX) { 5674 /* Use a hash table to figure out unique keys */ 5675 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5676 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5677 count = 0; 5678 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5679 for (i = 0; i < a->B->cmap->n; i++) { 5680 key = a->garray[i] / dof; 5681 PetscCall(PetscHMapIHas(hamp, key, &has)); 5682 if (!has) { 5683 mapping[i] = count; 5684 PetscCall(PetscHMapISet(hamp, key, count++)); 5685 } else { 5686 /* Current 'i' has the same value the previous step */ 5687 mapping[i] = count - 1; 5688 } 5689 } 5690 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5691 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5692 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5693 PetscCall(PetscCalloc1(htsize, &rowindices)); 5694 off = 0; 5695 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5696 PetscCall(PetscHMapIDestroy(&hamp)); 5697 PetscCall(PetscSortInt(htsize, rowindices)); 5698 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5699 /* In case, the matrix was already created but users want to recreate the matrix */ 5700 PetscCall(MatDestroy(P_oth)); 5701 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5702 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5703 PetscCall(ISDestroy(&map)); 5704 PetscCall(ISDestroy(&rows)); 5705 } else if (reuse == MAT_REUSE_MATRIX) { 5706 /* If matrix was already created, we simply update values using SF objects 5707 * that as attached to the matrix earlier. 5708 */ 5709 const PetscScalar *pd_a, *po_a; 5710 5711 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5712 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5713 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5714 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5715 /* Update values in place */ 5716 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5717 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5718 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5719 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5720 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5721 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5722 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5723 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5724 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5725 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5726 PetscFunctionReturn(PETSC_SUCCESS); 5727 } 5728 5729 /*@C 5730 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5731 5732 Collective 5733 5734 Input Parameters: 5735 + A - the first matrix in `MATMPIAIJ` format 5736 . B - the second matrix in `MATMPIAIJ` format 5737 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5738 5739 Output Parameters: 5740 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5741 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5742 - B_seq - the sequential matrix generated 5743 5744 Level: developer 5745 5746 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5747 @*/ 5748 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5749 { 5750 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5751 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5752 IS isrowb, iscolb; 5753 Mat *bseq = NULL; 5754 5755 PetscFunctionBegin; 5756 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5757 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5758 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5759 5760 if (scall == MAT_INITIAL_MATRIX) { 5761 start = A->cmap->rstart; 5762 cmap = a->garray; 5763 nzA = a->A->cmap->n; 5764 nzB = a->B->cmap->n; 5765 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5766 ncols = 0; 5767 for (i = 0; i < nzB; i++) { /* row < local row index */ 5768 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5769 else break; 5770 } 5771 imark = i; 5772 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5773 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5774 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5775 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5776 } else { 5777 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5778 isrowb = *rowb; 5779 iscolb = *colb; 5780 PetscCall(PetscMalloc1(1, &bseq)); 5781 bseq[0] = *B_seq; 5782 } 5783 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5784 *B_seq = bseq[0]; 5785 PetscCall(PetscFree(bseq)); 5786 if (!rowb) { 5787 PetscCall(ISDestroy(&isrowb)); 5788 } else { 5789 *rowb = isrowb; 5790 } 5791 if (!colb) { 5792 PetscCall(ISDestroy(&iscolb)); 5793 } else { 5794 *colb = iscolb; 5795 } 5796 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5797 PetscFunctionReturn(PETSC_SUCCESS); 5798 } 5799 5800 /* 5801 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5802 of the OFF-DIAGONAL portion of local A 5803 5804 Collective 5805 5806 Input Parameters: 5807 + A,B - the matrices in `MATMPIAIJ` format 5808 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5809 5810 Output Parameter: 5811 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5812 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5813 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5814 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5815 5816 Developer Note: 5817 This directly accesses information inside the VecScatter associated with the matrix-vector product 5818 for this matrix. This is not desirable.. 5819 5820 Level: developer 5821 5822 */ 5823 5824 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5825 { 5826 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5827 VecScatter ctx; 5828 MPI_Comm comm; 5829 const PetscMPIInt *rprocs, *sprocs; 5830 PetscMPIInt nrecvs, nsends; 5831 const PetscInt *srow, *rstarts, *sstarts; 5832 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5833 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5834 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5835 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5836 PetscMPIInt size, tag, rank, nreqs; 5837 5838 PetscFunctionBegin; 5839 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5840 PetscCallMPI(MPI_Comm_size(comm, &size)); 5841 5842 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5843 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5844 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5845 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5846 5847 if (size == 1) { 5848 startsj_s = NULL; 5849 bufa_ptr = NULL; 5850 *B_oth = NULL; 5851 PetscFunctionReturn(PETSC_SUCCESS); 5852 } 5853 5854 ctx = a->Mvctx; 5855 tag = ((PetscObject)ctx)->tag; 5856 5857 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5858 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5859 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5860 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5861 PetscCall(PetscMalloc1(nreqs, &reqs)); 5862 rwaits = reqs; 5863 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5864 5865 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5866 if (scall == MAT_INITIAL_MATRIX) { 5867 /* i-array */ 5868 /* post receives */ 5869 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5870 for (i = 0; i < nrecvs; i++) { 5871 rowlen = rvalues + rstarts[i] * rbs; 5872 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5873 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5874 } 5875 5876 /* pack the outgoing message */ 5877 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5878 5879 sstartsj[0] = 0; 5880 rstartsj[0] = 0; 5881 len = 0; /* total length of j or a array to be sent */ 5882 if (nsends) { 5883 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5884 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5885 } 5886 for (i = 0; i < nsends; i++) { 5887 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5888 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5889 for (j = 0; j < nrows; j++) { 5890 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5891 for (l = 0; l < sbs; l++) { 5892 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5893 5894 rowlen[j * sbs + l] = ncols; 5895 5896 len += ncols; 5897 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5898 } 5899 k++; 5900 } 5901 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5902 5903 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5904 } 5905 /* recvs and sends of i-array are completed */ 5906 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5907 PetscCall(PetscFree(svalues)); 5908 5909 /* allocate buffers for sending j and a arrays */ 5910 PetscCall(PetscMalloc1(len + 1, &bufj)); 5911 PetscCall(PetscMalloc1(len + 1, &bufa)); 5912 5913 /* create i-array of B_oth */ 5914 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5915 5916 b_othi[0] = 0; 5917 len = 0; /* total length of j or a array to be received */ 5918 k = 0; 5919 for (i = 0; i < nrecvs; i++) { 5920 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5921 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5922 for (j = 0; j < nrows; j++) { 5923 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5924 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5925 k++; 5926 } 5927 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5928 } 5929 PetscCall(PetscFree(rvalues)); 5930 5931 /* allocate space for j and a arrays of B_oth */ 5932 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5933 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5934 5935 /* j-array */ 5936 /* post receives of j-array */ 5937 for (i = 0; i < nrecvs; i++) { 5938 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5939 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5940 } 5941 5942 /* pack the outgoing message j-array */ 5943 if (nsends) k = sstarts[0]; 5944 for (i = 0; i < nsends; i++) { 5945 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5946 bufJ = bufj + sstartsj[i]; 5947 for (j = 0; j < nrows; j++) { 5948 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5949 for (ll = 0; ll < sbs; ll++) { 5950 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5951 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5952 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5953 } 5954 } 5955 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5956 } 5957 5958 /* recvs and sends of j-array are completed */ 5959 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5960 } else if (scall == MAT_REUSE_MATRIX) { 5961 sstartsj = *startsj_s; 5962 rstartsj = *startsj_r; 5963 bufa = *bufa_ptr; 5964 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5965 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5966 5967 /* a-array */ 5968 /* post receives of a-array */ 5969 for (i = 0; i < nrecvs; i++) { 5970 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5971 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5972 } 5973 5974 /* pack the outgoing message a-array */ 5975 if (nsends) k = sstarts[0]; 5976 for (i = 0; i < nsends; i++) { 5977 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5978 bufA = bufa + sstartsj[i]; 5979 for (j = 0; j < nrows; j++) { 5980 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5981 for (ll = 0; ll < sbs; ll++) { 5982 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5983 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5984 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5985 } 5986 } 5987 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5988 } 5989 /* recvs and sends of a-array are completed */ 5990 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5991 PetscCall(PetscFree(reqs)); 5992 5993 if (scall == MAT_INITIAL_MATRIX) { 5994 Mat_SeqAIJ *b_oth; 5995 5996 /* put together the new matrix */ 5997 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5998 5999 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6000 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6001 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6002 b_oth->free_a = PETSC_TRUE; 6003 b_oth->free_ij = PETSC_TRUE; 6004 b_oth->nonew = 0; 6005 6006 PetscCall(PetscFree(bufj)); 6007 if (!startsj_s || !bufa_ptr) { 6008 PetscCall(PetscFree2(sstartsj, rstartsj)); 6009 PetscCall(PetscFree(bufa_ptr)); 6010 } else { 6011 *startsj_s = sstartsj; 6012 *startsj_r = rstartsj; 6013 *bufa_ptr = bufa; 6014 } 6015 } else if (scall == MAT_REUSE_MATRIX) { 6016 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6017 } 6018 6019 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6020 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6021 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6022 PetscFunctionReturn(PETSC_SUCCESS); 6023 } 6024 6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6028 #if defined(PETSC_HAVE_MKL_SPARSE) 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6030 #endif 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6033 #if defined(PETSC_HAVE_ELEMENTAL) 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6035 #endif 6036 #if defined(PETSC_HAVE_SCALAPACK) 6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6038 #endif 6039 #if defined(PETSC_HAVE_HYPRE) 6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_CUDA) 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_HIP) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6052 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6054 6055 /* 6056 Computes (B'*A')' since computing B*A directly is untenable 6057 6058 n p p 6059 [ ] [ ] [ ] 6060 m [ A ] * n [ B ] = m [ C ] 6061 [ ] [ ] [ ] 6062 6063 */ 6064 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6065 { 6066 Mat At, Bt, Ct; 6067 6068 PetscFunctionBegin; 6069 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6070 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6071 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6072 PetscCall(MatDestroy(&At)); 6073 PetscCall(MatDestroy(&Bt)); 6074 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6075 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6076 PetscCall(MatDestroy(&Ct)); 6077 PetscFunctionReturn(PETSC_SUCCESS); 6078 } 6079 6080 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6081 { 6082 PetscBool cisdense; 6083 6084 PetscFunctionBegin; 6085 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6086 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6087 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6088 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6089 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6090 PetscCall(MatSetUp(C)); 6091 6092 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6093 PetscFunctionReturn(PETSC_SUCCESS); 6094 } 6095 6096 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6097 { 6098 Mat_Product *product = C->product; 6099 Mat A = product->A, B = product->B; 6100 6101 PetscFunctionBegin; 6102 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6103 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6104 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6105 C->ops->productsymbolic = MatProductSymbolic_AB; 6106 PetscFunctionReturn(PETSC_SUCCESS); 6107 } 6108 6109 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6110 { 6111 Mat_Product *product = C->product; 6112 6113 PetscFunctionBegin; 6114 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6115 PetscFunctionReturn(PETSC_SUCCESS); 6116 } 6117 6118 /* 6119 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6120 6121 Input Parameters: 6122 6123 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6124 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6125 6126 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6127 6128 For Set1, j1[] contains column indices of the nonzeros. 6129 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6130 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6131 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6132 6133 Similar for Set2. 6134 6135 This routine merges the two sets of nonzeros row by row and removes repeats. 6136 6137 Output Parameters: (memory is allocated by the caller) 6138 6139 i[],j[]: the CSR of the merged matrix, which has m rows. 6140 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6141 imap2[]: similar to imap1[], but for Set2. 6142 Note we order nonzeros row-by-row and from left to right. 6143 */ 6144 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6145 { 6146 PetscInt r, m; /* Row index of mat */ 6147 PetscCount t, t1, t2, b1, e1, b2, e2; 6148 6149 PetscFunctionBegin; 6150 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6151 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6152 i[0] = 0; 6153 for (r = 0; r < m; r++) { /* Do row by row merging */ 6154 b1 = rowBegin1[r]; 6155 e1 = rowEnd1[r]; 6156 b2 = rowBegin2[r]; 6157 e2 = rowEnd2[r]; 6158 while (b1 < e1 && b2 < e2) { 6159 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6160 j[t] = j1[b1]; 6161 imap1[t1] = t; 6162 imap2[t2] = t; 6163 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6164 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6165 t1++; 6166 t2++; 6167 t++; 6168 } else if (j1[b1] < j2[b2]) { 6169 j[t] = j1[b1]; 6170 imap1[t1] = t; 6171 b1 += jmap1[t1 + 1] - jmap1[t1]; 6172 t1++; 6173 t++; 6174 } else { 6175 j[t] = j2[b2]; 6176 imap2[t2] = t; 6177 b2 += jmap2[t2 + 1] - jmap2[t2]; 6178 t2++; 6179 t++; 6180 } 6181 } 6182 /* Merge the remaining in either j1[] or j2[] */ 6183 while (b1 < e1) { 6184 j[t] = j1[b1]; 6185 imap1[t1] = t; 6186 b1 += jmap1[t1 + 1] - jmap1[t1]; 6187 t1++; 6188 t++; 6189 } 6190 while (b2 < e2) { 6191 j[t] = j2[b2]; 6192 imap2[t2] = t; 6193 b2 += jmap2[t2 + 1] - jmap2[t2]; 6194 t2++; 6195 t++; 6196 } 6197 PetscCall(PetscIntCast(t, i + r + 1)); 6198 } 6199 PetscFunctionReturn(PETSC_SUCCESS); 6200 } 6201 6202 /* 6203 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6204 6205 Input Parameters: 6206 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6207 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6208 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6209 6210 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6211 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6212 6213 Output Parameters: 6214 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6215 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6216 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6217 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6218 6219 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6220 Atot: number of entries belonging to the diagonal block. 6221 Annz: number of unique nonzeros belonging to the diagonal block. 6222 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6223 repeats (i.e., same 'i,j' pair). 6224 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6225 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6226 6227 Atot: number of entries belonging to the diagonal block 6228 Annz: number of unique nonzeros belonging to the diagonal block. 6229 6230 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6231 6232 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6233 */ 6234 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6235 { 6236 PetscInt cstart, cend, rstart, rend, row, col; 6237 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6238 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6239 PetscCount k, m, p, q, r, s, mid; 6240 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6241 6242 PetscFunctionBegin; 6243 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6244 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6245 m = rend - rstart; 6246 6247 /* Skip negative rows */ 6248 for (k = 0; k < n; k++) 6249 if (i[k] >= 0) break; 6250 6251 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6252 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6253 */ 6254 while (k < n) { 6255 row = i[k]; 6256 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6257 for (s = k; s < n; s++) 6258 if (i[s] != row) break; 6259 6260 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6261 for (p = k; p < s; p++) { 6262 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6263 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6264 } 6265 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6266 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6267 rowBegin[row - rstart] = k; 6268 rowMid[row - rstart] = mid; 6269 rowEnd[row - rstart] = s; 6270 6271 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6272 Atot += mid - k; 6273 Btot += s - mid; 6274 6275 /* Count unique nonzeros of this diag row */ 6276 for (p = k; p < mid;) { 6277 col = j[p]; 6278 do { 6279 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6280 p++; 6281 } while (p < mid && j[p] == col); 6282 Annz++; 6283 } 6284 6285 /* Count unique nonzeros of this offdiag row */ 6286 for (p = mid; p < s;) { 6287 col = j[p]; 6288 do { 6289 p++; 6290 } while (p < s && j[p] == col); 6291 Bnnz++; 6292 } 6293 k = s; 6294 } 6295 6296 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6297 PetscCall(PetscMalloc1(Atot, &Aperm)); 6298 PetscCall(PetscMalloc1(Btot, &Bperm)); 6299 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6300 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6301 6302 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6303 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6304 for (r = 0; r < m; r++) { 6305 k = rowBegin[r]; 6306 mid = rowMid[r]; 6307 s = rowEnd[r]; 6308 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6309 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6310 Atot += mid - k; 6311 Btot += s - mid; 6312 6313 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6314 for (p = k; p < mid;) { 6315 col = j[p]; 6316 q = p; 6317 do { 6318 p++; 6319 } while (p < mid && j[p] == col); 6320 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6321 Annz++; 6322 } 6323 6324 for (p = mid; p < s;) { 6325 col = j[p]; 6326 q = p; 6327 do { 6328 p++; 6329 } while (p < s && j[p] == col); 6330 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6331 Bnnz++; 6332 } 6333 } 6334 /* Output */ 6335 *Aperm_ = Aperm; 6336 *Annz_ = Annz; 6337 *Atot_ = Atot; 6338 *Ajmap_ = Ajmap; 6339 *Bperm_ = Bperm; 6340 *Bnnz_ = Bnnz; 6341 *Btot_ = Btot; 6342 *Bjmap_ = Bjmap; 6343 PetscFunctionReturn(PETSC_SUCCESS); 6344 } 6345 6346 /* 6347 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6348 6349 Input Parameters: 6350 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6351 nnz: number of unique nonzeros in the merged matrix 6352 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6353 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6354 6355 Output Parameter: (memory is allocated by the caller) 6356 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6357 6358 Example: 6359 nnz1 = 4 6360 nnz = 6 6361 imap = [1,3,4,5] 6362 jmap = [0,3,5,6,7] 6363 then, 6364 jmap_new = [0,0,3,3,5,6,7] 6365 */ 6366 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6367 { 6368 PetscCount k, p; 6369 6370 PetscFunctionBegin; 6371 jmap_new[0] = 0; 6372 p = nnz; /* p loops over jmap_new[] backwards */ 6373 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6374 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6375 } 6376 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6377 PetscFunctionReturn(PETSC_SUCCESS); 6378 } 6379 6380 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6381 { 6382 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6383 6384 PetscFunctionBegin; 6385 PetscCall(PetscSFDestroy(&coo->sf)); 6386 PetscCall(PetscFree(coo->Aperm1)); 6387 PetscCall(PetscFree(coo->Bperm1)); 6388 PetscCall(PetscFree(coo->Ajmap1)); 6389 PetscCall(PetscFree(coo->Bjmap1)); 6390 PetscCall(PetscFree(coo->Aimap2)); 6391 PetscCall(PetscFree(coo->Bimap2)); 6392 PetscCall(PetscFree(coo->Aperm2)); 6393 PetscCall(PetscFree(coo->Bperm2)); 6394 PetscCall(PetscFree(coo->Ajmap2)); 6395 PetscCall(PetscFree(coo->Bjmap2)); 6396 PetscCall(PetscFree(coo->Cperm1)); 6397 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6398 PetscCall(PetscFree(coo)); 6399 PetscFunctionReturn(PETSC_SUCCESS); 6400 } 6401 6402 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6403 { 6404 MPI_Comm comm; 6405 PetscMPIInt rank, size; 6406 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6407 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6408 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6409 PetscContainer container; 6410 MatCOOStruct_MPIAIJ *coo; 6411 6412 PetscFunctionBegin; 6413 PetscCall(PetscFree(mpiaij->garray)); 6414 PetscCall(VecDestroy(&mpiaij->lvec)); 6415 #if defined(PETSC_USE_CTABLE) 6416 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6417 #else 6418 PetscCall(PetscFree(mpiaij->colmap)); 6419 #endif 6420 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6421 mat->assembled = PETSC_FALSE; 6422 mat->was_assembled = PETSC_FALSE; 6423 6424 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6425 PetscCallMPI(MPI_Comm_size(comm, &size)); 6426 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6427 PetscCall(PetscLayoutSetUp(mat->rmap)); 6428 PetscCall(PetscLayoutSetUp(mat->cmap)); 6429 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6430 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6431 PetscCall(MatGetLocalSize(mat, &m, &n)); 6432 PetscCall(MatGetSize(mat, &M, &N)); 6433 6434 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6435 /* entries come first, then local rows, then remote rows. */ 6436 PetscCount n1 = coo_n, *perm1; 6437 PetscInt *i1 = coo_i, *j1 = coo_j; 6438 6439 PetscCall(PetscMalloc1(n1, &perm1)); 6440 for (k = 0; k < n1; k++) perm1[k] = k; 6441 6442 /* Manipulate indices so that entries with negative row or col indices will have smallest 6443 row indices, local entries will have greater but negative row indices, and remote entries 6444 will have positive row indices. 6445 */ 6446 for (k = 0; k < n1; k++) { 6447 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6448 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6449 else { 6450 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6451 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6452 } 6453 } 6454 6455 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6456 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6457 6458 /* Advance k to the first entry we need to take care of */ 6459 for (k = 0; k < n1; k++) 6460 if (i1[k] > PETSC_INT_MIN) break; 6461 PetscCount i1start = k; 6462 6463 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6464 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6465 6466 /* Send remote rows to their owner */ 6467 /* Find which rows should be sent to which remote ranks*/ 6468 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6469 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6470 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6471 const PetscInt *ranges; 6472 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6473 6474 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6475 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6476 for (k = rem; k < n1;) { 6477 PetscMPIInt owner; 6478 PetscInt firstRow, lastRow; 6479 6480 /* Locate a row range */ 6481 firstRow = i1[k]; /* first row of this owner */ 6482 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6483 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6484 6485 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6486 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6487 6488 /* All entries in [k,p) belong to this remote owner */ 6489 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6490 PetscMPIInt *sendto2; 6491 PetscInt *nentries2; 6492 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6493 6494 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6495 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6496 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6497 PetscCall(PetscFree2(sendto, nentries2)); 6498 sendto = sendto2; 6499 nentries = nentries2; 6500 maxNsend = maxNsend2; 6501 } 6502 sendto[nsend] = owner; 6503 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6504 nsend++; 6505 k = p; 6506 } 6507 6508 /* Build 1st SF to know offsets on remote to send data */ 6509 PetscSF sf1; 6510 PetscInt nroots = 1, nroots2 = 0; 6511 PetscInt nleaves = nsend, nleaves2 = 0; 6512 PetscInt *offsets; 6513 PetscSFNode *iremote; 6514 6515 PetscCall(PetscSFCreate(comm, &sf1)); 6516 PetscCall(PetscMalloc1(nsend, &iremote)); 6517 PetscCall(PetscMalloc1(nsend, &offsets)); 6518 for (k = 0; k < nsend; k++) { 6519 iremote[k].rank = sendto[k]; 6520 iremote[k].index = 0; 6521 nleaves2 += nentries[k]; 6522 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6523 } 6524 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6525 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6526 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6527 PetscCall(PetscSFDestroy(&sf1)); 6528 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6529 6530 /* Build 2nd SF to send remote COOs to their owner */ 6531 PetscSF sf2; 6532 nroots = nroots2; 6533 nleaves = nleaves2; 6534 PetscCall(PetscSFCreate(comm, &sf2)); 6535 PetscCall(PetscSFSetFromOptions(sf2)); 6536 PetscCall(PetscMalloc1(nleaves, &iremote)); 6537 p = 0; 6538 for (k = 0; k < nsend; k++) { 6539 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6540 for (q = 0; q < nentries[k]; q++, p++) { 6541 iremote[p].rank = sendto[k]; 6542 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6543 } 6544 } 6545 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6546 6547 /* Send the remote COOs to their owner */ 6548 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6549 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6550 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6551 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6552 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6553 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6554 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6555 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6556 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6559 6560 PetscCall(PetscFree(offsets)); 6561 PetscCall(PetscFree2(sendto, nentries)); 6562 6563 /* Sort received COOs by row along with the permutation array */ 6564 for (k = 0; k < n2; k++) perm2[k] = k; 6565 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6566 6567 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6568 PetscCount *Cperm1; 6569 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6570 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6571 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6572 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6573 6574 /* Support for HYPRE matrices, kind of a hack. 6575 Swap min column with diagonal so that diagonal values will go first */ 6576 PetscBool hypre; 6577 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6578 if (hypre) { 6579 PetscInt *minj; 6580 PetscBT hasdiag; 6581 6582 PetscCall(PetscBTCreate(m, &hasdiag)); 6583 PetscCall(PetscMalloc1(m, &minj)); 6584 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6585 for (k = i1start; k < rem; k++) { 6586 if (j1[k] < cstart || j1[k] >= cend) continue; 6587 const PetscInt rindex = i1[k] - rstart; 6588 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6589 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6590 } 6591 for (k = 0; k < n2; k++) { 6592 if (j2[k] < cstart || j2[k] >= cend) continue; 6593 const PetscInt rindex = i2[k] - rstart; 6594 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6595 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6596 } 6597 for (k = i1start; k < rem; k++) { 6598 const PetscInt rindex = i1[k] - rstart; 6599 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6600 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6601 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6602 } 6603 for (k = 0; k < n2; k++) { 6604 const PetscInt rindex = i2[k] - rstart; 6605 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6606 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6607 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6608 } 6609 PetscCall(PetscBTDestroy(&hasdiag)); 6610 PetscCall(PetscFree(minj)); 6611 } 6612 6613 /* Split local COOs and received COOs into diag/offdiag portions */ 6614 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6615 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6616 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6617 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6618 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6619 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6620 6621 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6622 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6623 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6624 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6625 6626 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6627 PetscInt *Ai, *Bi; 6628 PetscInt *Aj, *Bj; 6629 6630 PetscCall(PetscMalloc1(m + 1, &Ai)); 6631 PetscCall(PetscMalloc1(m + 1, &Bi)); 6632 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6633 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6634 6635 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6636 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6637 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6638 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6639 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6640 6641 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6642 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6643 6644 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6645 /* expect nonzeros in A/B most likely have local contributing entries */ 6646 PetscInt Annz = Ai[m]; 6647 PetscInt Bnnz = Bi[m]; 6648 PetscCount *Ajmap1_new, *Bjmap1_new; 6649 6650 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6651 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6652 6653 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6654 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6655 6656 PetscCall(PetscFree(Aimap1)); 6657 PetscCall(PetscFree(Ajmap1)); 6658 PetscCall(PetscFree(Bimap1)); 6659 PetscCall(PetscFree(Bjmap1)); 6660 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6661 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6662 PetscCall(PetscFree(perm1)); 6663 PetscCall(PetscFree3(i2, j2, perm2)); 6664 6665 Ajmap1 = Ajmap1_new; 6666 Bjmap1 = Bjmap1_new; 6667 6668 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6669 if (Annz < Annz1 + Annz2) { 6670 PetscInt *Aj_new; 6671 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6672 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6673 PetscCall(PetscFree(Aj)); 6674 Aj = Aj_new; 6675 } 6676 6677 if (Bnnz < Bnnz1 + Bnnz2) { 6678 PetscInt *Bj_new; 6679 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6680 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6681 PetscCall(PetscFree(Bj)); 6682 Bj = Bj_new; 6683 } 6684 6685 /* Create new submatrices for on-process and off-process coupling */ 6686 PetscScalar *Aa, *Ba; 6687 MatType rtype; 6688 Mat_SeqAIJ *a, *b; 6689 PetscObjectState state; 6690 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6691 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6692 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6693 if (cstart) { 6694 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6695 } 6696 6697 PetscCall(MatGetRootType_Private(mat, &rtype)); 6698 6699 MatSeqXAIJGetOptions_Private(mpiaij->A); 6700 PetscCall(MatDestroy(&mpiaij->A)); 6701 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6702 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6703 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6704 6705 MatSeqXAIJGetOptions_Private(mpiaij->B); 6706 PetscCall(MatDestroy(&mpiaij->B)); 6707 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6708 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6709 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6710 6711 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6712 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6713 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6714 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6715 6716 a = (Mat_SeqAIJ *)mpiaij->A->data; 6717 b = (Mat_SeqAIJ *)mpiaij->B->data; 6718 a->free_a = PETSC_TRUE; 6719 a->free_ij = PETSC_TRUE; 6720 b->free_a = PETSC_TRUE; 6721 b->free_ij = PETSC_TRUE; 6722 a->maxnz = a->nz; 6723 b->maxnz = b->nz; 6724 6725 /* conversion must happen AFTER multiply setup */ 6726 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6727 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6728 PetscCall(VecDestroy(&mpiaij->lvec)); 6729 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6730 6731 // Put the COO struct in a container and then attach that to the matrix 6732 PetscCall(PetscMalloc1(1, &coo)); 6733 coo->n = coo_n; 6734 coo->sf = sf2; 6735 coo->sendlen = nleaves; 6736 coo->recvlen = nroots; 6737 coo->Annz = Annz; 6738 coo->Bnnz = Bnnz; 6739 coo->Annz2 = Annz2; 6740 coo->Bnnz2 = Bnnz2; 6741 coo->Atot1 = Atot1; 6742 coo->Atot2 = Atot2; 6743 coo->Btot1 = Btot1; 6744 coo->Btot2 = Btot2; 6745 coo->Ajmap1 = Ajmap1; 6746 coo->Aperm1 = Aperm1; 6747 coo->Bjmap1 = Bjmap1; 6748 coo->Bperm1 = Bperm1; 6749 coo->Aimap2 = Aimap2; 6750 coo->Ajmap2 = Ajmap2; 6751 coo->Aperm2 = Aperm2; 6752 coo->Bimap2 = Bimap2; 6753 coo->Bjmap2 = Bjmap2; 6754 coo->Bperm2 = Bperm2; 6755 coo->Cperm1 = Cperm1; 6756 // Allocate in preallocation. If not used, it has zero cost on host 6757 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6758 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6759 PetscCall(PetscContainerSetPointer(container, coo)); 6760 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6761 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6762 PetscCall(PetscContainerDestroy(&container)); 6763 PetscFunctionReturn(PETSC_SUCCESS); 6764 } 6765 6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6767 { 6768 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6769 Mat A = mpiaij->A, B = mpiaij->B; 6770 PetscScalar *Aa, *Ba; 6771 PetscScalar *sendbuf, *recvbuf; 6772 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6773 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6774 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6775 const PetscCount *Cperm1; 6776 PetscContainer container; 6777 MatCOOStruct_MPIAIJ *coo; 6778 6779 PetscFunctionBegin; 6780 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6781 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6782 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6783 sendbuf = coo->sendbuf; 6784 recvbuf = coo->recvbuf; 6785 Ajmap1 = coo->Ajmap1; 6786 Ajmap2 = coo->Ajmap2; 6787 Aimap2 = coo->Aimap2; 6788 Bjmap1 = coo->Bjmap1; 6789 Bjmap2 = coo->Bjmap2; 6790 Bimap2 = coo->Bimap2; 6791 Aperm1 = coo->Aperm1; 6792 Aperm2 = coo->Aperm2; 6793 Bperm1 = coo->Bperm1; 6794 Bperm2 = coo->Bperm2; 6795 Cperm1 = coo->Cperm1; 6796 6797 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6798 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6799 6800 /* Pack entries to be sent to remote */ 6801 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6802 6803 /* Send remote entries to their owner and overlap the communication with local computation */ 6804 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6805 /* Add local entries to A and B */ 6806 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6807 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6808 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6809 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6810 } 6811 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6812 PetscScalar sum = 0.0; 6813 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6814 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6815 } 6816 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6817 6818 /* Add received remote entries to A and B */ 6819 for (PetscCount i = 0; i < coo->Annz2; i++) { 6820 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6821 } 6822 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6823 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6824 } 6825 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6826 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6827 PetscFunctionReturn(PETSC_SUCCESS); 6828 } 6829 6830 /*MC 6831 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6832 6833 Options Database Keys: 6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6835 6836 Level: beginner 6837 6838 Notes: 6839 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6840 in this case the values associated with the rows and columns one passes in are set to zero 6841 in the matrix 6842 6843 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6844 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6845 6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6847 M*/ 6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6849 { 6850 Mat_MPIAIJ *b; 6851 PetscMPIInt size; 6852 6853 PetscFunctionBegin; 6854 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6855 6856 PetscCall(PetscNew(&b)); 6857 B->data = (void *)b; 6858 B->ops[0] = MatOps_Values; 6859 B->assembled = PETSC_FALSE; 6860 B->insertmode = NOT_SET_VALUES; 6861 b->size = size; 6862 6863 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6864 6865 /* build cache for off array entries formed */ 6866 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6867 6868 b->donotstash = PETSC_FALSE; 6869 b->colmap = NULL; 6870 b->garray = NULL; 6871 b->roworiented = PETSC_TRUE; 6872 6873 /* stuff used for matrix vector multiply */ 6874 b->lvec = NULL; 6875 b->Mvctx = NULL; 6876 6877 /* stuff for MatGetRow() */ 6878 b->rowindices = NULL; 6879 b->rowvalues = NULL; 6880 b->getrowactive = PETSC_FALSE; 6881 6882 /* flexible pointer used in CUSPARSE classes */ 6883 b->spptr = NULL; 6884 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6896 #if defined(PETSC_HAVE_CUDA) 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6898 #endif 6899 #if defined(PETSC_HAVE_HIP) 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6901 #endif 6902 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6904 #endif 6905 #if defined(PETSC_HAVE_MKL_SPARSE) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6907 #endif 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6912 #if defined(PETSC_HAVE_ELEMENTAL) 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6914 #endif 6915 #if defined(PETSC_HAVE_SCALAPACK) 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6917 #endif 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6920 #if defined(PETSC_HAVE_HYPRE) 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6923 #endif 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6928 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6929 PetscFunctionReturn(PETSC_SUCCESS); 6930 } 6931 6932 /*@ 6933 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6934 and "off-diagonal" part of the matrix in CSR format. 6935 6936 Collective 6937 6938 Input Parameters: 6939 + comm - MPI communicator 6940 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6941 . n - This value should be the same as the local size used in creating the 6942 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6943 calculated if `N` is given) For square matrices `n` is almost always `m`. 6944 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6945 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6946 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6947 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6948 . a - matrix values 6949 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6950 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6951 - oa - matrix values 6952 6953 Output Parameter: 6954 . mat - the matrix 6955 6956 Level: advanced 6957 6958 Notes: 6959 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6960 must free the arrays once the matrix has been destroyed and not before. 6961 6962 The `i` and `j` indices are 0 based 6963 6964 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6965 6966 This sets local rows and cannot be used to set off-processor values. 6967 6968 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6969 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6970 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6971 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6972 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6973 communication if it is known that only local entries will be set. 6974 6975 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6976 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6977 @*/ 6978 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6979 { 6980 Mat_MPIAIJ *maij; 6981 6982 PetscFunctionBegin; 6983 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6984 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6985 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6986 PetscCall(MatCreate(comm, mat)); 6987 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6988 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6989 maij = (Mat_MPIAIJ *)(*mat)->data; 6990 6991 (*mat)->preallocated = PETSC_TRUE; 6992 6993 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6994 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6995 6996 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6997 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6998 6999 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7000 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7001 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7002 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7003 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7004 PetscFunctionReturn(PETSC_SUCCESS); 7005 } 7006 7007 typedef struct { 7008 Mat *mp; /* intermediate products */ 7009 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7010 PetscInt cp; /* number of intermediate products */ 7011 7012 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7013 PetscInt *startsj_s, *startsj_r; 7014 PetscScalar *bufa; 7015 Mat P_oth; 7016 7017 /* may take advantage of merging product->B */ 7018 Mat Bloc; /* B-local by merging diag and off-diag */ 7019 7020 /* cusparse does not have support to split between symbolic and numeric phases. 7021 When api_user is true, we don't need to update the numerical values 7022 of the temporary storage */ 7023 PetscBool reusesym; 7024 7025 /* support for COO values insertion */ 7026 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7027 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7028 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7029 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7030 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7031 PetscMemType mtype; 7032 7033 /* customization */ 7034 PetscBool abmerge; 7035 PetscBool P_oth_bind; 7036 } MatMatMPIAIJBACKEND; 7037 7038 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7039 { 7040 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7041 PetscInt i; 7042 7043 PetscFunctionBegin; 7044 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7045 PetscCall(PetscFree(mmdata->bufa)); 7046 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7047 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7048 PetscCall(MatDestroy(&mmdata->P_oth)); 7049 PetscCall(MatDestroy(&mmdata->Bloc)); 7050 PetscCall(PetscSFDestroy(&mmdata->sf)); 7051 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7052 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7053 PetscCall(PetscFree(mmdata->own[0])); 7054 PetscCall(PetscFree(mmdata->own)); 7055 PetscCall(PetscFree(mmdata->off[0])); 7056 PetscCall(PetscFree(mmdata->off)); 7057 PetscCall(PetscFree(mmdata)); 7058 PetscFunctionReturn(PETSC_SUCCESS); 7059 } 7060 7061 /* Copy selected n entries with indices in idx[] of A to v[]. 7062 If idx is NULL, copy the whole data array of A to v[] 7063 */ 7064 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7065 { 7066 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7067 7068 PetscFunctionBegin; 7069 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7070 if (f) { 7071 PetscCall((*f)(A, n, idx, v)); 7072 } else { 7073 const PetscScalar *vv; 7074 7075 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7076 if (n && idx) { 7077 PetscScalar *w = v; 7078 const PetscInt *oi = idx; 7079 PetscInt j; 7080 7081 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7082 } else { 7083 PetscCall(PetscArraycpy(v, vv, n)); 7084 } 7085 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7086 } 7087 PetscFunctionReturn(PETSC_SUCCESS); 7088 } 7089 7090 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7091 { 7092 MatMatMPIAIJBACKEND *mmdata; 7093 PetscInt i, n_d, n_o; 7094 7095 PetscFunctionBegin; 7096 MatCheckProduct(C, 1); 7097 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7098 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7099 if (!mmdata->reusesym) { /* update temporary matrices */ 7100 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7101 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7102 } 7103 mmdata->reusesym = PETSC_FALSE; 7104 7105 for (i = 0; i < mmdata->cp; i++) { 7106 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7107 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7108 } 7109 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7110 PetscInt noff; 7111 7112 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7113 if (mmdata->mptmp[i]) continue; 7114 if (noff) { 7115 PetscInt nown; 7116 7117 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7118 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7119 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7120 n_o += noff; 7121 n_d += nown; 7122 } else { 7123 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7124 7125 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7126 n_d += mm->nz; 7127 } 7128 } 7129 if (mmdata->hasoffproc) { /* offprocess insertion */ 7130 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7131 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7132 } 7133 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7134 PetscFunctionReturn(PETSC_SUCCESS); 7135 } 7136 7137 /* Support for Pt * A, A * P, or Pt * A * P */ 7138 #define MAX_NUMBER_INTERMEDIATE 4 7139 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7140 { 7141 Mat_Product *product = C->product; 7142 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7143 Mat_MPIAIJ *a, *p; 7144 MatMatMPIAIJBACKEND *mmdata; 7145 ISLocalToGlobalMapping P_oth_l2g = NULL; 7146 IS glob = NULL; 7147 const char *prefix; 7148 char pprefix[256]; 7149 const PetscInt *globidx, *P_oth_idx; 7150 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7151 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7152 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7153 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7154 /* a base offset; type-2: sparse with a local to global map table */ 7155 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7156 7157 MatProductType ptype; 7158 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7159 PetscMPIInt size; 7160 7161 PetscFunctionBegin; 7162 MatCheckProduct(C, 1); 7163 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7164 ptype = product->type; 7165 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7166 ptype = MATPRODUCT_AB; 7167 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7168 } 7169 switch (ptype) { 7170 case MATPRODUCT_AB: 7171 A = product->A; 7172 P = product->B; 7173 m = A->rmap->n; 7174 n = P->cmap->n; 7175 M = A->rmap->N; 7176 N = P->cmap->N; 7177 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7178 break; 7179 case MATPRODUCT_AtB: 7180 P = product->A; 7181 A = product->B; 7182 m = P->cmap->n; 7183 n = A->cmap->n; 7184 M = P->cmap->N; 7185 N = A->cmap->N; 7186 hasoffproc = PETSC_TRUE; 7187 break; 7188 case MATPRODUCT_PtAP: 7189 A = product->A; 7190 P = product->B; 7191 m = P->cmap->n; 7192 n = P->cmap->n; 7193 M = P->cmap->N; 7194 N = P->cmap->N; 7195 hasoffproc = PETSC_TRUE; 7196 break; 7197 default: 7198 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7199 } 7200 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7201 if (size == 1) hasoffproc = PETSC_FALSE; 7202 7203 /* defaults */ 7204 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7205 mp[i] = NULL; 7206 mptmp[i] = PETSC_FALSE; 7207 rmapt[i] = -1; 7208 cmapt[i] = -1; 7209 rmapa[i] = NULL; 7210 cmapa[i] = NULL; 7211 } 7212 7213 /* customization */ 7214 PetscCall(PetscNew(&mmdata)); 7215 mmdata->reusesym = product->api_user; 7216 if (ptype == MATPRODUCT_AB) { 7217 if (product->api_user) { 7218 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7219 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7220 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7221 PetscOptionsEnd(); 7222 } else { 7223 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7224 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7225 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7226 PetscOptionsEnd(); 7227 } 7228 } else if (ptype == MATPRODUCT_PtAP) { 7229 if (product->api_user) { 7230 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7231 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } else { 7234 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7235 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7236 PetscOptionsEnd(); 7237 } 7238 } 7239 a = (Mat_MPIAIJ *)A->data; 7240 p = (Mat_MPIAIJ *)P->data; 7241 PetscCall(MatSetSizes(C, m, n, M, N)); 7242 PetscCall(PetscLayoutSetUp(C->rmap)); 7243 PetscCall(PetscLayoutSetUp(C->cmap)); 7244 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7245 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7246 7247 cp = 0; 7248 switch (ptype) { 7249 case MATPRODUCT_AB: /* A * P */ 7250 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7251 7252 /* A_diag * P_local (merged or not) */ 7253 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7254 /* P is product->B */ 7255 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7256 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7257 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7258 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7259 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7260 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7261 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7262 mp[cp]->product->api_user = product->api_user; 7263 PetscCall(MatProductSetFromOptions(mp[cp])); 7264 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7265 PetscCall(ISGetIndices(glob, &globidx)); 7266 rmapt[cp] = 1; 7267 cmapt[cp] = 2; 7268 cmapa[cp] = globidx; 7269 mptmp[cp] = PETSC_FALSE; 7270 cp++; 7271 } else { /* A_diag * P_diag and A_diag * P_off */ 7272 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7273 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7274 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7275 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7276 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7277 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7278 mp[cp]->product->api_user = product->api_user; 7279 PetscCall(MatProductSetFromOptions(mp[cp])); 7280 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7281 rmapt[cp] = 1; 7282 cmapt[cp] = 1; 7283 mptmp[cp] = PETSC_FALSE; 7284 cp++; 7285 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7286 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7287 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7288 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7289 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7290 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7291 mp[cp]->product->api_user = product->api_user; 7292 PetscCall(MatProductSetFromOptions(mp[cp])); 7293 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7294 rmapt[cp] = 1; 7295 cmapt[cp] = 2; 7296 cmapa[cp] = p->garray; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 } 7300 7301 /* A_off * P_other */ 7302 if (mmdata->P_oth) { 7303 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7304 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7305 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7306 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7307 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7308 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7309 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7310 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7311 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7312 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7313 mp[cp]->product->api_user = product->api_user; 7314 PetscCall(MatProductSetFromOptions(mp[cp])); 7315 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7316 rmapt[cp] = 1; 7317 cmapt[cp] = 2; 7318 cmapa[cp] = P_oth_idx; 7319 mptmp[cp] = PETSC_FALSE; 7320 cp++; 7321 } 7322 break; 7323 7324 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7325 /* A is product->B */ 7326 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7327 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7328 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7329 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7330 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7331 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7332 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7333 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7334 mp[cp]->product->api_user = product->api_user; 7335 PetscCall(MatProductSetFromOptions(mp[cp])); 7336 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7337 PetscCall(ISGetIndices(glob, &globidx)); 7338 rmapt[cp] = 2; 7339 rmapa[cp] = globidx; 7340 cmapt[cp] = 2; 7341 cmapa[cp] = globidx; 7342 mptmp[cp] = PETSC_FALSE; 7343 cp++; 7344 } else { 7345 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7346 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7347 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7348 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7349 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7350 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7351 mp[cp]->product->api_user = product->api_user; 7352 PetscCall(MatProductSetFromOptions(mp[cp])); 7353 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7354 PetscCall(ISGetIndices(glob, &globidx)); 7355 rmapt[cp] = 1; 7356 cmapt[cp] = 2; 7357 cmapa[cp] = globidx; 7358 mptmp[cp] = PETSC_FALSE; 7359 cp++; 7360 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7361 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7362 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7363 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7364 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7365 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7366 mp[cp]->product->api_user = product->api_user; 7367 PetscCall(MatProductSetFromOptions(mp[cp])); 7368 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7369 rmapt[cp] = 2; 7370 rmapa[cp] = p->garray; 7371 cmapt[cp] = 2; 7372 cmapa[cp] = globidx; 7373 mptmp[cp] = PETSC_FALSE; 7374 cp++; 7375 } 7376 break; 7377 case MATPRODUCT_PtAP: 7378 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7379 /* P is product->B */ 7380 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7381 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7382 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7383 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7384 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7385 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7386 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7387 mp[cp]->product->api_user = product->api_user; 7388 PetscCall(MatProductSetFromOptions(mp[cp])); 7389 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7390 PetscCall(ISGetIndices(glob, &globidx)); 7391 rmapt[cp] = 2; 7392 rmapa[cp] = globidx; 7393 cmapt[cp] = 2; 7394 cmapa[cp] = globidx; 7395 mptmp[cp] = PETSC_FALSE; 7396 cp++; 7397 if (mmdata->P_oth) { 7398 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7399 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7400 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7401 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7402 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7403 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7404 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7405 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7406 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7407 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7408 mp[cp]->product->api_user = product->api_user; 7409 PetscCall(MatProductSetFromOptions(mp[cp])); 7410 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7411 mptmp[cp] = PETSC_TRUE; 7412 cp++; 7413 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7414 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7415 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7416 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7417 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7418 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7419 mp[cp]->product->api_user = product->api_user; 7420 PetscCall(MatProductSetFromOptions(mp[cp])); 7421 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7422 rmapt[cp] = 2; 7423 rmapa[cp] = globidx; 7424 cmapt[cp] = 2; 7425 cmapa[cp] = P_oth_idx; 7426 mptmp[cp] = PETSC_FALSE; 7427 cp++; 7428 } 7429 break; 7430 default: 7431 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7432 } 7433 /* sanity check */ 7434 if (size > 1) 7435 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7436 7437 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7438 for (i = 0; i < cp; i++) { 7439 mmdata->mp[i] = mp[i]; 7440 mmdata->mptmp[i] = mptmp[i]; 7441 } 7442 mmdata->cp = cp; 7443 C->product->data = mmdata; 7444 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7445 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7446 7447 /* memory type */ 7448 mmdata->mtype = PETSC_MEMTYPE_HOST; 7449 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7450 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7451 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7452 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7453 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7454 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7455 7456 /* prepare coo coordinates for values insertion */ 7457 7458 /* count total nonzeros of those intermediate seqaij Mats 7459 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7460 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7461 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7462 */ 7463 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7464 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7465 if (mptmp[cp]) continue; 7466 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7467 const PetscInt *rmap = rmapa[cp]; 7468 const PetscInt mr = mp[cp]->rmap->n; 7469 const PetscInt rs = C->rmap->rstart; 7470 const PetscInt re = C->rmap->rend; 7471 const PetscInt *ii = mm->i; 7472 for (i = 0; i < mr; i++) { 7473 const PetscInt gr = rmap[i]; 7474 const PetscInt nz = ii[i + 1] - ii[i]; 7475 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7476 else ncoo_oown += nz; /* this row is local */ 7477 } 7478 } else ncoo_d += mm->nz; 7479 } 7480 7481 /* 7482 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7483 7484 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7485 7486 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7487 7488 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7489 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7490 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7491 7492 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7493 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7494 */ 7495 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7496 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7497 7498 /* gather (i,j) of nonzeros inserted by remote procs */ 7499 if (hasoffproc) { 7500 PetscSF msf; 7501 PetscInt ncoo2, *coo_i2, *coo_j2; 7502 7503 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7504 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7505 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7506 7507 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7508 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7509 PetscInt *idxoff = mmdata->off[cp]; 7510 PetscInt *idxown = mmdata->own[cp]; 7511 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7512 const PetscInt *rmap = rmapa[cp]; 7513 const PetscInt *cmap = cmapa[cp]; 7514 const PetscInt *ii = mm->i; 7515 PetscInt *coi = coo_i + ncoo_o; 7516 PetscInt *coj = coo_j + ncoo_o; 7517 const PetscInt mr = mp[cp]->rmap->n; 7518 const PetscInt rs = C->rmap->rstart; 7519 const PetscInt re = C->rmap->rend; 7520 const PetscInt cs = C->cmap->rstart; 7521 for (i = 0; i < mr; i++) { 7522 const PetscInt *jj = mm->j + ii[i]; 7523 const PetscInt gr = rmap[i]; 7524 const PetscInt nz = ii[i + 1] - ii[i]; 7525 if (gr < rs || gr >= re) { /* this is an offproc row */ 7526 for (j = ii[i]; j < ii[i + 1]; j++) { 7527 *coi++ = gr; 7528 *idxoff++ = j; 7529 } 7530 if (!cmapt[cp]) { /* already global */ 7531 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7532 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7533 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7534 } else { /* offdiag */ 7535 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7536 } 7537 ncoo_o += nz; 7538 } else { /* this is a local row */ 7539 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7540 } 7541 } 7542 } 7543 mmdata->off[cp + 1] = idxoff; 7544 mmdata->own[cp + 1] = idxown; 7545 } 7546 7547 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7548 PetscInt incoo_o; 7549 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7550 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7551 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7552 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7553 ncoo = ncoo_d + ncoo_oown + ncoo2; 7554 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7555 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7556 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7557 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7558 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7559 PetscCall(PetscFree2(coo_i, coo_j)); 7560 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7561 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7562 coo_i = coo_i2; 7563 coo_j = coo_j2; 7564 } else { /* no offproc values insertion */ 7565 ncoo = ncoo_d; 7566 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7567 7568 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7569 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7570 PetscCall(PetscSFSetUp(mmdata->sf)); 7571 } 7572 mmdata->hasoffproc = hasoffproc; 7573 7574 /* gather (i,j) of nonzeros inserted locally */ 7575 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7576 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7577 PetscInt *coi = coo_i + ncoo_d; 7578 PetscInt *coj = coo_j + ncoo_d; 7579 const PetscInt *jj = mm->j; 7580 const PetscInt *ii = mm->i; 7581 const PetscInt *cmap = cmapa[cp]; 7582 const PetscInt *rmap = rmapa[cp]; 7583 const PetscInt mr = mp[cp]->rmap->n; 7584 const PetscInt rs = C->rmap->rstart; 7585 const PetscInt re = C->rmap->rend; 7586 const PetscInt cs = C->cmap->rstart; 7587 7588 if (mptmp[cp]) continue; 7589 if (rmapt[cp] == 1) { /* consecutive rows */ 7590 /* fill coo_i */ 7591 for (i = 0; i < mr; i++) { 7592 const PetscInt gr = i + rs; 7593 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7594 } 7595 /* fill coo_j */ 7596 if (!cmapt[cp]) { /* type-0, already global */ 7597 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7598 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7599 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7600 } else { /* type-2, local to global for sparse columns */ 7601 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7602 } 7603 ncoo_d += mm->nz; 7604 } else if (rmapt[cp] == 2) { /* sparse rows */ 7605 for (i = 0; i < mr; i++) { 7606 const PetscInt *jj = mm->j + ii[i]; 7607 const PetscInt gr = rmap[i]; 7608 const PetscInt nz = ii[i + 1] - ii[i]; 7609 if (gr >= rs && gr < re) { /* local rows */ 7610 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7611 if (!cmapt[cp]) { /* type-0, already global */ 7612 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7613 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7614 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7615 } else { /* type-2, local to global for sparse columns */ 7616 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7617 } 7618 ncoo_d += nz; 7619 } 7620 } 7621 } 7622 } 7623 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7624 PetscCall(ISDestroy(&glob)); 7625 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7626 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7627 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7628 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7629 7630 /* set block sizes */ 7631 A = product->A; 7632 P = product->B; 7633 switch (ptype) { 7634 case MATPRODUCT_PtAP: 7635 if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7636 break; 7637 case MATPRODUCT_RARt: 7638 if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7639 break; 7640 case MATPRODUCT_ABC: 7641 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7642 break; 7643 case MATPRODUCT_AB: 7644 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7645 break; 7646 case MATPRODUCT_AtB: 7647 if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7648 break; 7649 case MATPRODUCT_ABt: 7650 if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7651 break; 7652 default: 7653 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7654 } 7655 7656 /* preallocate with COO data */ 7657 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7658 PetscCall(PetscFree2(coo_i, coo_j)); 7659 PetscFunctionReturn(PETSC_SUCCESS); 7660 } 7661 7662 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7663 { 7664 Mat_Product *product = mat->product; 7665 #if defined(PETSC_HAVE_DEVICE) 7666 PetscBool match = PETSC_FALSE; 7667 PetscBool usecpu = PETSC_FALSE; 7668 #else 7669 PetscBool match = PETSC_TRUE; 7670 #endif 7671 7672 PetscFunctionBegin; 7673 MatCheckProduct(mat, 1); 7674 #if defined(PETSC_HAVE_DEVICE) 7675 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7676 if (match) { /* we can always fallback to the CPU if requested */ 7677 switch (product->type) { 7678 case MATPRODUCT_AB: 7679 if (product->api_user) { 7680 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7681 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7682 PetscOptionsEnd(); 7683 } else { 7684 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7685 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7686 PetscOptionsEnd(); 7687 } 7688 break; 7689 case MATPRODUCT_AtB: 7690 if (product->api_user) { 7691 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7692 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7693 PetscOptionsEnd(); 7694 } else { 7695 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7696 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7697 PetscOptionsEnd(); 7698 } 7699 break; 7700 case MATPRODUCT_PtAP: 7701 if (product->api_user) { 7702 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7703 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7704 PetscOptionsEnd(); 7705 } else { 7706 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7707 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7708 PetscOptionsEnd(); 7709 } 7710 break; 7711 default: 7712 break; 7713 } 7714 match = (PetscBool)!usecpu; 7715 } 7716 #endif 7717 if (match) { 7718 switch (product->type) { 7719 case MATPRODUCT_AB: 7720 case MATPRODUCT_AtB: 7721 case MATPRODUCT_PtAP: 7722 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7723 break; 7724 default: 7725 break; 7726 } 7727 } 7728 /* fallback to MPIAIJ ops */ 7729 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7730 PetscFunctionReturn(PETSC_SUCCESS); 7731 } 7732 7733 /* 7734 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7735 7736 n - the number of block indices in cc[] 7737 cc - the block indices (must be large enough to contain the indices) 7738 */ 7739 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7740 { 7741 PetscInt cnt = -1, nidx, j; 7742 const PetscInt *idx; 7743 7744 PetscFunctionBegin; 7745 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7746 if (nidx) { 7747 cnt = 0; 7748 cc[cnt] = idx[0] / bs; 7749 for (j = 1; j < nidx; j++) { 7750 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7751 } 7752 } 7753 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7754 *n = cnt + 1; 7755 PetscFunctionReturn(PETSC_SUCCESS); 7756 } 7757 7758 /* 7759 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7760 7761 ncollapsed - the number of block indices 7762 collapsed - the block indices (must be large enough to contain the indices) 7763 */ 7764 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7765 { 7766 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7767 7768 PetscFunctionBegin; 7769 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7770 for (i = start + 1; i < start + bs; i++) { 7771 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7772 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7773 cprevtmp = cprev; 7774 cprev = merged; 7775 merged = cprevtmp; 7776 } 7777 *ncollapsed = nprev; 7778 if (collapsed) *collapsed = cprev; 7779 PetscFunctionReturn(PETSC_SUCCESS); 7780 } 7781 7782 /* 7783 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7784 7785 Input Parameter: 7786 . Amat - matrix 7787 - symmetrize - make the result symmetric 7788 + scale - scale with diagonal 7789 7790 Output Parameter: 7791 . a_Gmat - output scalar graph >= 0 7792 7793 */ 7794 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7795 { 7796 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7797 MPI_Comm comm; 7798 Mat Gmat; 7799 PetscBool ismpiaij, isseqaij; 7800 Mat a, b, c; 7801 MatType jtype; 7802 7803 PetscFunctionBegin; 7804 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7805 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7806 PetscCall(MatGetSize(Amat, &MM, &NN)); 7807 PetscCall(MatGetBlockSize(Amat, &bs)); 7808 nloc = (Iend - Istart) / bs; 7809 7810 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7811 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7812 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7813 7814 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7815 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7816 implementation */ 7817 if (bs > 1) { 7818 PetscCall(MatGetType(Amat, &jtype)); 7819 PetscCall(MatCreate(comm, &Gmat)); 7820 PetscCall(MatSetType(Gmat, jtype)); 7821 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7822 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7823 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7824 PetscInt *d_nnz, *o_nnz; 7825 MatScalar *aa, val, *AA; 7826 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7827 7828 if (isseqaij) { 7829 a = Amat; 7830 b = NULL; 7831 } else { 7832 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7833 a = d->A; 7834 b = d->B; 7835 } 7836 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7837 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7838 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7839 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7840 const PetscInt *cols1, *cols2; 7841 7842 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7843 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7844 nnz[brow / bs] = nc2 / bs; 7845 if (nc2 % bs) ok = 0; 7846 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7847 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7848 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7849 if (nc1 != nc2) ok = 0; 7850 else { 7851 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7852 if (cols1[jj] != cols2[jj]) ok = 0; 7853 if (cols1[jj] % bs != jj % bs) ok = 0; 7854 } 7855 } 7856 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7857 } 7858 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7859 if (!ok) { 7860 PetscCall(PetscFree2(d_nnz, o_nnz)); 7861 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7862 goto old_bs; 7863 } 7864 } 7865 } 7866 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7867 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7868 PetscCall(PetscFree2(d_nnz, o_nnz)); 7869 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7870 // diag 7871 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7872 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7873 7874 ai = aseq->i; 7875 n = ai[brow + 1] - ai[brow]; 7876 aj = aseq->j + ai[brow]; 7877 for (PetscInt k = 0; k < n; k += bs) { // block columns 7878 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7879 val = 0; 7880 if (index_size == 0) { 7881 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7882 aa = aseq->a + ai[brow + ii] + k; 7883 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7884 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7885 } 7886 } 7887 } else { // use (index,index) value if provided 7888 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7889 PetscInt ii = index[iii]; 7890 aa = aseq->a + ai[brow + ii] + k; 7891 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7892 PetscInt jj = index[jjj]; 7893 val += PetscAbs(PetscRealPart(aa[jj])); 7894 } 7895 } 7896 } 7897 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7898 AA[k / bs] = val; 7899 } 7900 grow = Istart / bs + brow / bs; 7901 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7902 } 7903 // off-diag 7904 if (ismpiaij) { 7905 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7906 const PetscScalar *vals; 7907 const PetscInt *cols, *garray = aij->garray; 7908 7909 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7910 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7911 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7912 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7913 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7914 AA[k / bs] = 0; 7915 AJ[cidx] = garray[cols[k]] / bs; 7916 } 7917 nc = ncols / bs; 7918 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7919 if (index_size == 0) { 7920 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7921 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7922 for (PetscInt k = 0; k < ncols; k += bs) { 7923 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7924 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7925 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7926 } 7927 } 7928 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7929 } 7930 } else { // use (index,index) value if provided 7931 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7932 PetscInt ii = index[iii]; 7933 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7934 for (PetscInt k = 0; k < ncols; k += bs) { 7935 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7936 PetscInt jj = index[jjj]; 7937 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7938 } 7939 } 7940 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7941 } 7942 } 7943 grow = Istart / bs + brow / bs; 7944 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7945 } 7946 } 7947 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7948 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7949 PetscCall(PetscFree2(AA, AJ)); 7950 } else { 7951 const PetscScalar *vals; 7952 const PetscInt *idx; 7953 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7954 old_bs: 7955 /* 7956 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7957 */ 7958 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7959 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7960 if (isseqaij) { 7961 PetscInt max_d_nnz; 7962 7963 /* 7964 Determine exact preallocation count for (sequential) scalar matrix 7965 */ 7966 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7967 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7968 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7969 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7970 PetscCall(PetscFree3(w0, w1, w2)); 7971 } else if (ismpiaij) { 7972 Mat Daij, Oaij; 7973 const PetscInt *garray; 7974 PetscInt max_d_nnz; 7975 7976 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7977 /* 7978 Determine exact preallocation count for diagonal block portion of scalar matrix 7979 */ 7980 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7981 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7982 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7983 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7984 PetscCall(PetscFree3(w0, w1, w2)); 7985 /* 7986 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7987 */ 7988 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7989 o_nnz[jj] = 0; 7990 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7991 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7992 o_nnz[jj] += ncols; 7993 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7994 } 7995 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7996 } 7997 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7998 /* get scalar copy (norms) of matrix */ 7999 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8000 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8001 PetscCall(PetscFree2(d_nnz, o_nnz)); 8002 for (Ii = Istart; Ii < Iend; Ii++) { 8003 PetscInt dest_row = Ii / bs; 8004 8005 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8006 for (jj = 0; jj < ncols; jj++) { 8007 PetscInt dest_col = idx[jj] / bs; 8008 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8009 8010 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8011 } 8012 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8013 } 8014 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8015 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8016 } 8017 } else { 8018 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8019 else { 8020 Gmat = Amat; 8021 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8022 } 8023 if (isseqaij) { 8024 a = Gmat; 8025 b = NULL; 8026 } else { 8027 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8028 a = d->A; 8029 b = d->B; 8030 } 8031 if (filter >= 0 || scale) { 8032 /* take absolute value of each entry */ 8033 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8034 MatInfo info; 8035 PetscScalar *avals; 8036 8037 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8038 PetscCall(MatSeqAIJGetArray(c, &avals)); 8039 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8040 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8041 } 8042 } 8043 } 8044 if (symmetrize) { 8045 PetscBool isset, issym; 8046 8047 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8048 if (!isset || !issym) { 8049 Mat matTrans; 8050 8051 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8052 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8053 PetscCall(MatDestroy(&matTrans)); 8054 } 8055 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8056 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8057 if (scale) { 8058 /* scale c for all diagonal values = 1 or -1 */ 8059 Vec diag; 8060 8061 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8062 PetscCall(MatGetDiagonal(Gmat, diag)); 8063 PetscCall(VecReciprocal(diag)); 8064 PetscCall(VecSqrtAbs(diag)); 8065 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8066 PetscCall(VecDestroy(&diag)); 8067 } 8068 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8069 if (filter >= 0) { 8070 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8071 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8072 } 8073 *a_Gmat = Gmat; 8074 PetscFunctionReturn(PETSC_SUCCESS); 8075 } 8076 8077 /* 8078 Special version for direct calls from Fortran 8079 */ 8080 8081 /* Change these macros so can be used in void function */ 8082 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8083 #undef PetscCall 8084 #define PetscCall(...) \ 8085 do { \ 8086 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8087 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8088 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8089 return; \ 8090 } \ 8091 } while (0) 8092 8093 #undef SETERRQ 8094 #define SETERRQ(comm, ierr, ...) \ 8095 do { \ 8096 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8097 return; \ 8098 } while (0) 8099 8100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8101 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8103 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8104 #else 8105 #endif 8106 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8107 { 8108 Mat mat = *mmat; 8109 PetscInt m = *mm, n = *mn; 8110 InsertMode addv = *maddv; 8111 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8112 PetscScalar value; 8113 8114 MatCheckPreallocated(mat, 1); 8115 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8116 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8117 { 8118 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8119 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8120 PetscBool roworiented = aij->roworiented; 8121 8122 /* Some Variables required in the macro */ 8123 Mat A = aij->A; 8124 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8125 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8126 MatScalar *aa; 8127 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8128 Mat B = aij->B; 8129 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8130 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8131 MatScalar *ba; 8132 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8133 * cannot use "#if defined" inside a macro. */ 8134 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8135 8136 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8137 PetscInt nonew = a->nonew; 8138 MatScalar *ap1, *ap2; 8139 8140 PetscFunctionBegin; 8141 PetscCall(MatSeqAIJGetArray(A, &aa)); 8142 PetscCall(MatSeqAIJGetArray(B, &ba)); 8143 for (i = 0; i < m; i++) { 8144 if (im[i] < 0) continue; 8145 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8146 if (im[i] >= rstart && im[i] < rend) { 8147 row = im[i] - rstart; 8148 lastcol1 = -1; 8149 rp1 = aj + ai[row]; 8150 ap1 = aa + ai[row]; 8151 rmax1 = aimax[row]; 8152 nrow1 = ailen[row]; 8153 low1 = 0; 8154 high1 = nrow1; 8155 lastcol2 = -1; 8156 rp2 = bj + bi[row]; 8157 ap2 = ba + bi[row]; 8158 rmax2 = bimax[row]; 8159 nrow2 = bilen[row]; 8160 low2 = 0; 8161 high2 = nrow2; 8162 8163 for (j = 0; j < n; j++) { 8164 if (roworiented) value = v[i * n + j]; 8165 else value = v[i + j * m]; 8166 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8167 if (in[j] >= cstart && in[j] < cend) { 8168 col = in[j] - cstart; 8169 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8170 } else if (in[j] < 0) continue; 8171 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8172 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8173 } else { 8174 if (mat->was_assembled) { 8175 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8176 #if defined(PETSC_USE_CTABLE) 8177 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8178 col--; 8179 #else 8180 col = aij->colmap[in[j]] - 1; 8181 #endif 8182 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8183 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8184 col = in[j]; 8185 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8186 B = aij->B; 8187 b = (Mat_SeqAIJ *)B->data; 8188 bimax = b->imax; 8189 bi = b->i; 8190 bilen = b->ilen; 8191 bj = b->j; 8192 rp2 = bj + bi[row]; 8193 ap2 = ba + bi[row]; 8194 rmax2 = bimax[row]; 8195 nrow2 = bilen[row]; 8196 low2 = 0; 8197 high2 = nrow2; 8198 bm = aij->B->rmap->n; 8199 ba = b->a; 8200 inserted = PETSC_FALSE; 8201 } 8202 } else col = in[j]; 8203 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8204 } 8205 } 8206 } else if (!aij->donotstash) { 8207 if (roworiented) { 8208 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8209 } else { 8210 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8211 } 8212 } 8213 } 8214 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8215 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8216 } 8217 PetscFunctionReturnVoid(); 8218 } 8219 8220 /* Undefining these here since they were redefined from their original definition above! No 8221 * other PETSc functions should be defined past this point, as it is impossible to recover the 8222 * original definitions */ 8223 #undef PetscCall 8224 #undef SETERRQ 8225