1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ, 2877 NULL, 2878 NULL, 2879 /*155*/ NULL, 2880 MatCopyHashToXAIJ_MPI_Hash}; 2881 2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatStoreValues(aij->A)); 2888 PetscCall(MatStoreValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2893 { 2894 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2895 2896 PetscFunctionBegin; 2897 PetscCall(MatRetrieveValues(aij->A)); 2898 PetscCall(MatRetrieveValues(aij->B)); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2903 { 2904 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2905 PetscMPIInt size; 2906 2907 PetscFunctionBegin; 2908 if (B->hash_active) { 2909 B->ops[0] = b->cops; 2910 B->hash_active = PETSC_FALSE; 2911 } 2912 PetscCall(PetscLayoutSetUp(B->rmap)); 2913 PetscCall(PetscLayoutSetUp(B->cmap)); 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2925 2926 MatSeqXAIJGetOptions_Private(b->B); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->B); 2933 2934 MatSeqXAIJGetOptions_Private(b->A); 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 MatSeqXAIJRestoreOptions_Private(b->A); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2959 else { 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 } 2968 PetscCall(VecScatterDestroy(&b->Mvctx)); 2969 2970 PetscCall(MatResetPreallocation(b->A)); 2971 PetscCall(MatResetPreallocation(b->B)); 2972 B->preallocated = PETSC_TRUE; 2973 B->was_assembled = PETSC_FALSE; 2974 B->assembled = PETSC_FALSE; 2975 PetscFunctionReturn(PETSC_SUCCESS); 2976 } 2977 2978 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2979 { 2980 Mat mat; 2981 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2982 2983 PetscFunctionBegin; 2984 *newmat = NULL; 2985 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2986 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2987 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2988 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2989 a = (Mat_MPIAIJ *)mat->data; 2990 2991 mat->factortype = matin->factortype; 2992 mat->assembled = matin->assembled; 2993 mat->insertmode = NOT_SET_VALUES; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 if (matin->hash_active) { 3006 PetscCall(MatSetUp(mat)); 3007 } else { 3008 mat->preallocated = matin->preallocated; 3009 if (oldmat->colmap) { 3010 #if defined(PETSC_USE_CTABLE) 3011 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3012 #else 3013 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3014 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3015 #endif 3016 } else a->colmap = NULL; 3017 if (oldmat->garray) { 3018 PetscInt len; 3019 len = oldmat->B->cmap->n; 3020 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3021 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3022 } else a->garray = NULL; 3023 3024 /* It may happen MatDuplicate is called with a non-assembled matrix 3025 In fact, MatDuplicate only requires the matrix to be preallocated 3026 This may happen inside a DMCreateMatrix_Shell */ 3027 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3028 if (oldmat->Mvctx) { 3029 a->Mvctx = oldmat->Mvctx; 3030 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3031 } 3032 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3033 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3034 } 3035 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3036 *newmat = mat; 3037 PetscFunctionReturn(PETSC_SUCCESS); 3038 } 3039 3040 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3041 { 3042 PetscBool isbinary, ishdf5; 3043 3044 PetscFunctionBegin; 3045 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3046 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3047 /* force binary viewer to load .info file if it has not yet done so */ 3048 PetscCall(PetscViewerSetUp(viewer)); 3049 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3050 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3051 if (isbinary) { 3052 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3053 } else if (ishdf5) { 3054 #if defined(PETSC_HAVE_HDF5) 3055 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3056 #else 3057 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3058 #endif 3059 } else { 3060 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3061 } 3062 PetscFunctionReturn(PETSC_SUCCESS); 3063 } 3064 3065 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3066 { 3067 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3068 PetscInt *rowidxs, *colidxs; 3069 PetscScalar *matvals; 3070 3071 PetscFunctionBegin; 3072 PetscCall(PetscViewerSetUp(viewer)); 3073 3074 /* read in matrix header */ 3075 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3076 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3077 M = header[1]; 3078 N = header[2]; 3079 nz = header[3]; 3080 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3081 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3082 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3083 3084 /* set block sizes from the viewer's .info file */ 3085 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3086 /* set global sizes if not set already */ 3087 if (mat->rmap->N < 0) mat->rmap->N = M; 3088 if (mat->cmap->N < 0) mat->cmap->N = N; 3089 PetscCall(PetscLayoutSetUp(mat->rmap)); 3090 PetscCall(PetscLayoutSetUp(mat->cmap)); 3091 3092 /* check if the matrix sizes are correct */ 3093 PetscCall(MatGetSize(mat, &rows, &cols)); 3094 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3095 3096 /* read in row lengths and build row indices */ 3097 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3098 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3100 rowidxs[0] = 0; 3101 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3102 if (nz != PETSC_INT_MAX) { 3103 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3104 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3105 } 3106 3107 /* read in column indices and matrix values */ 3108 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3109 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3110 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3111 /* store matrix indices and values */ 3112 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3113 PetscCall(PetscFree(rowidxs)); 3114 PetscCall(PetscFree2(colidxs, matvals)); 3115 PetscFunctionReturn(PETSC_SUCCESS); 3116 } 3117 3118 /* Not scalable because of ISAllGather() unless getting all columns. */ 3119 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3120 { 3121 IS iscol_local; 3122 PetscBool isstride; 3123 PetscMPIInt gisstride = 0; 3124 3125 PetscFunctionBegin; 3126 /* check if we are grabbing all columns*/ 3127 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3128 3129 if (isstride) { 3130 PetscInt start, len, mstart, mlen; 3131 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3132 PetscCall(ISGetLocalSize(iscol, &len)); 3133 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3134 if (mstart == start && mlen - mstart == len) gisstride = 1; 3135 } 3136 3137 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3138 if (gisstride) { 3139 PetscInt N; 3140 PetscCall(MatGetSize(mat, NULL, &N)); 3141 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3142 PetscCall(ISSetIdentity(iscol_local)); 3143 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3144 } else { 3145 PetscInt cbs; 3146 PetscCall(ISGetBlockSize(iscol, &cbs)); 3147 PetscCall(ISAllGather(iscol, &iscol_local)); 3148 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3149 } 3150 3151 *isseq = iscol_local; 3152 PetscFunctionReturn(PETSC_SUCCESS); 3153 } 3154 3155 /* 3156 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3157 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3158 3159 Input Parameters: 3160 + mat - matrix 3161 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3162 i.e., mat->rstart <= isrow[i] < mat->rend 3163 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3164 i.e., mat->cstart <= iscol[i] < mat->cend 3165 3166 Output Parameters: 3167 + isrow_d - sequential row index set for retrieving mat->A 3168 . iscol_d - sequential column index set for retrieving mat->A 3169 . iscol_o - sequential column index set for retrieving mat->B 3170 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3171 */ 3172 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3173 { 3174 Vec x, cmap; 3175 const PetscInt *is_idx; 3176 PetscScalar *xarray, *cmaparray; 3177 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3178 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3179 Mat B = a->B; 3180 Vec lvec = a->lvec, lcmap; 3181 PetscInt i, cstart, cend, Bn = B->cmap->N; 3182 MPI_Comm comm; 3183 VecScatter Mvctx = a->Mvctx; 3184 3185 PetscFunctionBegin; 3186 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3187 PetscCall(ISGetLocalSize(iscol, &ncols)); 3188 3189 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3190 PetscCall(MatCreateVecs(mat, &x, NULL)); 3191 PetscCall(VecSet(x, -1.0)); 3192 PetscCall(VecDuplicate(x, &cmap)); 3193 PetscCall(VecSet(cmap, -1.0)); 3194 3195 /* Get start indices */ 3196 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3197 isstart -= ncols; 3198 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3199 3200 PetscCall(ISGetIndices(iscol, &is_idx)); 3201 PetscCall(VecGetArray(x, &xarray)); 3202 PetscCall(VecGetArray(cmap, &cmaparray)); 3203 PetscCall(PetscMalloc1(ncols, &idx)); 3204 for (i = 0; i < ncols; i++) { 3205 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3206 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3207 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3208 } 3209 PetscCall(VecRestoreArray(x, &xarray)); 3210 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3211 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3212 3213 /* Get iscol_d */ 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3215 PetscCall(ISGetBlockSize(iscol, &i)); 3216 PetscCall(ISSetBlockSize(*iscol_d, i)); 3217 3218 /* Get isrow_d */ 3219 PetscCall(ISGetLocalSize(isrow, &m)); 3220 rstart = mat->rmap->rstart; 3221 PetscCall(PetscMalloc1(m, &idx)); 3222 PetscCall(ISGetIndices(isrow, &is_idx)); 3223 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3224 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3225 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3227 PetscCall(ISGetBlockSize(isrow, &i)); 3228 PetscCall(ISSetBlockSize(*isrow_d, i)); 3229 3230 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3231 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3232 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3233 3234 PetscCall(VecDuplicate(lvec, &lcmap)); 3235 3236 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3237 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3238 3239 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3240 /* off-process column indices */ 3241 count = 0; 3242 PetscCall(PetscMalloc1(Bn, &idx)); 3243 PetscCall(PetscMalloc1(Bn, &cmap1)); 3244 3245 PetscCall(VecGetArray(lvec, &xarray)); 3246 PetscCall(VecGetArray(lcmap, &cmaparray)); 3247 for (i = 0; i < Bn; i++) { 3248 if (PetscRealPart(xarray[i]) > -1.0) { 3249 idx[count] = i; /* local column index in off-diagonal part B */ 3250 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3251 count++; 3252 } 3253 } 3254 PetscCall(VecRestoreArray(lvec, &xarray)); 3255 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3256 3257 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3258 /* cannot ensure iscol_o has same blocksize as iscol! */ 3259 3260 PetscCall(PetscFree(idx)); 3261 *garray = cmap1; 3262 3263 PetscCall(VecDestroy(&x)); 3264 PetscCall(VecDestroy(&cmap)); 3265 PetscCall(VecDestroy(&lcmap)); 3266 PetscFunctionReturn(PETSC_SUCCESS); 3267 } 3268 3269 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3270 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3271 { 3272 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3273 Mat M = NULL; 3274 MPI_Comm comm; 3275 IS iscol_d, isrow_d, iscol_o; 3276 Mat Asub = NULL, Bsub = NULL; 3277 PetscInt n; 3278 3279 PetscFunctionBegin; 3280 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3281 3282 if (call == MAT_REUSE_MATRIX) { 3283 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3284 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3285 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3286 3287 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3288 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3289 3290 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3291 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3292 3293 /* Update diagonal and off-diagonal portions of submat */ 3294 asub = (Mat_MPIAIJ *)(*submat)->data; 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3296 PetscCall(ISGetLocalSize(iscol_o, &n)); 3297 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3298 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3299 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3300 3301 } else { /* call == MAT_INITIAL_MATRIX) */ 3302 PetscInt *garray; 3303 PetscInt BsubN; 3304 3305 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3306 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3307 3308 /* Create local submatrices Asub and Bsub */ 3309 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3310 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3311 3312 /* Create submatrix M */ 3313 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3314 3315 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3316 asub = (Mat_MPIAIJ *)M->data; 3317 3318 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3319 n = asub->B->cmap->N; 3320 if (BsubN > n) { 3321 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3322 const PetscInt *idx; 3323 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3324 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3325 3326 PetscCall(PetscMalloc1(n, &idx_new)); 3327 j = 0; 3328 PetscCall(ISGetIndices(iscol_o, &idx)); 3329 for (i = 0; i < n; i++) { 3330 if (j >= BsubN) break; 3331 while (subgarray[i] > garray[j]) j++; 3332 3333 if (subgarray[i] == garray[j]) { 3334 idx_new[i] = idx[j++]; 3335 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3336 } 3337 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3338 3339 PetscCall(ISDestroy(&iscol_o)); 3340 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3341 3342 } else if (BsubN < n) { 3343 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3344 } 3345 3346 PetscCall(PetscFree(garray)); 3347 *submat = M; 3348 3349 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3350 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3351 PetscCall(ISDestroy(&isrow_d)); 3352 3353 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3354 PetscCall(ISDestroy(&iscol_d)); 3355 3356 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3357 PetscCall(ISDestroy(&iscol_o)); 3358 } 3359 PetscFunctionReturn(PETSC_SUCCESS); 3360 } 3361 3362 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3363 { 3364 IS iscol_local = NULL, isrow_d; 3365 PetscInt csize; 3366 PetscInt n, i, j, start, end; 3367 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3368 MPI_Comm comm; 3369 3370 PetscFunctionBegin; 3371 /* If isrow has same processor distribution as mat, 3372 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3373 if (call == MAT_REUSE_MATRIX) { 3374 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3375 if (isrow_d) { 3376 sameRowDist = PETSC_TRUE; 3377 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3378 } else { 3379 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3380 if (iscol_local) { 3381 sameRowDist = PETSC_TRUE; 3382 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3383 } 3384 } 3385 } else { 3386 /* Check if isrow has same processor distribution as mat */ 3387 sameDist[0] = PETSC_FALSE; 3388 PetscCall(ISGetLocalSize(isrow, &n)); 3389 if (!n) { 3390 sameDist[0] = PETSC_TRUE; 3391 } else { 3392 PetscCall(ISGetMinMax(isrow, &i, &j)); 3393 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3394 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3395 } 3396 3397 /* Check if iscol has same processor distribution as mat */ 3398 sameDist[1] = PETSC_FALSE; 3399 PetscCall(ISGetLocalSize(iscol, &n)); 3400 if (!n) { 3401 sameDist[1] = PETSC_TRUE; 3402 } else { 3403 PetscCall(ISGetMinMax(iscol, &i, &j)); 3404 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3405 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3406 } 3407 3408 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3409 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3410 sameRowDist = tsameDist[0]; 3411 } 3412 3413 if (sameRowDist) { 3414 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3415 /* isrow and iscol have same processor distribution as mat */ 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } else { /* sameRowDist */ 3419 /* isrow has same processor distribution as mat */ 3420 if (call == MAT_INITIAL_MATRIX) { 3421 PetscBool sorted; 3422 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3423 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3424 PetscCall(ISGetSize(iscol, &i)); 3425 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3426 3427 PetscCall(ISSorted(iscol_local, &sorted)); 3428 if (sorted) { 3429 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3430 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3431 PetscFunctionReturn(PETSC_SUCCESS); 3432 } 3433 } else { /* call == MAT_REUSE_MATRIX */ 3434 IS iscol_sub; 3435 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3436 if (iscol_sub) { 3437 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 } 3441 } 3442 } 3443 3444 /* General case: iscol -> iscol_local which has global size of iscol */ 3445 if (call == MAT_REUSE_MATRIX) { 3446 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3447 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3448 } else { 3449 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3450 } 3451 3452 PetscCall(ISGetLocalSize(iscol, &csize)); 3453 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3454 3455 if (call == MAT_INITIAL_MATRIX) { 3456 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3457 PetscCall(ISDestroy(&iscol_local)); 3458 } 3459 PetscFunctionReturn(PETSC_SUCCESS); 3460 } 3461 3462 /*@C 3463 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3464 and "off-diagonal" part of the matrix in CSR format. 3465 3466 Collective 3467 3468 Input Parameters: 3469 + comm - MPI communicator 3470 . A - "diagonal" portion of matrix 3471 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3472 - garray - global index of `B` columns 3473 3474 Output Parameter: 3475 . mat - the matrix, with input `A` as its local diagonal matrix 3476 3477 Level: advanced 3478 3479 Notes: 3480 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3481 3482 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3483 3484 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3485 @*/ 3486 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3487 { 3488 Mat_MPIAIJ *maij; 3489 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3490 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3491 const PetscScalar *oa; 3492 Mat Bnew; 3493 PetscInt m, n, N; 3494 MatType mpi_mat_type; 3495 3496 PetscFunctionBegin; 3497 PetscCall(MatCreate(comm, mat)); 3498 PetscCall(MatGetSize(A, &m, &n)); 3499 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3500 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3501 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3502 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3503 3504 /* Get global columns of mat */ 3505 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3506 3507 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3508 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3509 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3510 PetscCall(MatSetType(*mat, mpi_mat_type)); 3511 3512 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3513 maij = (Mat_MPIAIJ *)(*mat)->data; 3514 3515 (*mat)->preallocated = PETSC_TRUE; 3516 3517 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3518 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3519 3520 /* Set A as diagonal portion of *mat */ 3521 maij->A = A; 3522 3523 nz = oi[m]; 3524 for (i = 0; i < nz; i++) { 3525 col = oj[i]; 3526 oj[i] = garray[col]; 3527 } 3528 3529 /* Set Bnew as off-diagonal portion of *mat */ 3530 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3531 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3532 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3533 bnew = (Mat_SeqAIJ *)Bnew->data; 3534 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3535 maij->B = Bnew; 3536 3537 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3538 3539 b->free_a = PETSC_FALSE; 3540 b->free_ij = PETSC_FALSE; 3541 PetscCall(MatDestroy(&B)); 3542 3543 bnew->free_a = PETSC_TRUE; 3544 bnew->free_ij = PETSC_TRUE; 3545 3546 /* condense columns of maij->B */ 3547 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3548 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3549 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3550 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3551 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3552 PetscFunctionReturn(PETSC_SUCCESS); 3553 } 3554 3555 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3556 3557 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3558 { 3559 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3560 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3561 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3562 Mat M, Msub, B = a->B; 3563 MatScalar *aa; 3564 Mat_SeqAIJ *aij; 3565 PetscInt *garray = a->garray, *colsub, Ncols; 3566 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3567 IS iscol_sub, iscmap; 3568 const PetscInt *is_idx, *cmap; 3569 PetscBool allcolumns = PETSC_FALSE; 3570 MPI_Comm comm; 3571 3572 PetscFunctionBegin; 3573 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3574 if (call == MAT_REUSE_MATRIX) { 3575 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3576 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3577 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3578 3579 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3580 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3581 3582 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3583 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3584 3585 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3586 3587 } else { /* call == MAT_INITIAL_MATRIX) */ 3588 PetscBool flg; 3589 3590 PetscCall(ISGetLocalSize(iscol, &n)); 3591 PetscCall(ISGetSize(iscol, &Ncols)); 3592 3593 /* (1) iscol -> nonscalable iscol_local */ 3594 /* Check for special case: each processor gets entire matrix columns */ 3595 PetscCall(ISIdentity(iscol_local, &flg)); 3596 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3597 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3598 if (allcolumns) { 3599 iscol_sub = iscol_local; 3600 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3601 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3602 3603 } else { 3604 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3605 PetscInt *idx, *cmap1, k; 3606 PetscCall(PetscMalloc1(Ncols, &idx)); 3607 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3608 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3609 count = 0; 3610 k = 0; 3611 for (i = 0; i < Ncols; i++) { 3612 j = is_idx[i]; 3613 if (j >= cstart && j < cend) { 3614 /* diagonal part of mat */ 3615 idx[count] = j; 3616 cmap1[count++] = i; /* column index in submat */ 3617 } else if (Bn) { 3618 /* off-diagonal part of mat */ 3619 if (j == garray[k]) { 3620 idx[count] = j; 3621 cmap1[count++] = i; /* column index in submat */ 3622 } else if (j > garray[k]) { 3623 while (j > garray[k] && k < Bn - 1) k++; 3624 if (j == garray[k]) { 3625 idx[count] = j; 3626 cmap1[count++] = i; /* column index in submat */ 3627 } 3628 } 3629 } 3630 } 3631 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3632 3633 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3634 PetscCall(ISGetBlockSize(iscol, &cbs)); 3635 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3636 3637 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3638 } 3639 3640 /* (3) Create sequential Msub */ 3641 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3642 } 3643 3644 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3645 aij = (Mat_SeqAIJ *)Msub->data; 3646 ii = aij->i; 3647 PetscCall(ISGetIndices(iscmap, &cmap)); 3648 3649 /* 3650 m - number of local rows 3651 Ncols - number of columns (same on all processors) 3652 rstart - first row in new global matrix generated 3653 */ 3654 PetscCall(MatGetSize(Msub, &m, NULL)); 3655 3656 if (call == MAT_INITIAL_MATRIX) { 3657 /* (4) Create parallel newmat */ 3658 PetscMPIInt rank, size; 3659 PetscInt csize; 3660 3661 PetscCallMPI(MPI_Comm_size(comm, &size)); 3662 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3663 3664 /* 3665 Determine the number of non-zeros in the diagonal and off-diagonal 3666 portions of the matrix in order to do correct preallocation 3667 */ 3668 3669 /* first get start and end of "diagonal" columns */ 3670 PetscCall(ISGetLocalSize(iscol, &csize)); 3671 if (csize == PETSC_DECIDE) { 3672 PetscCall(ISGetSize(isrow, &mglobal)); 3673 if (mglobal == Ncols) { /* square matrix */ 3674 nlocal = m; 3675 } else { 3676 nlocal = Ncols / size + ((Ncols % size) > rank); 3677 } 3678 } else { 3679 nlocal = csize; 3680 } 3681 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3682 rstart = rend - nlocal; 3683 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3684 3685 /* next, compute all the lengths */ 3686 jj = aij->j; 3687 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3688 olens = dlens + m; 3689 for (i = 0; i < m; i++) { 3690 jend = ii[i + 1] - ii[i]; 3691 olen = 0; 3692 dlen = 0; 3693 for (j = 0; j < jend; j++) { 3694 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3695 else dlen++; 3696 jj++; 3697 } 3698 olens[i] = olen; 3699 dlens[i] = dlen; 3700 } 3701 3702 PetscCall(ISGetBlockSize(isrow, &bs)); 3703 PetscCall(ISGetBlockSize(iscol, &cbs)); 3704 3705 PetscCall(MatCreate(comm, &M)); 3706 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3707 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3708 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3709 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3710 PetscCall(PetscFree(dlens)); 3711 3712 } else { /* call == MAT_REUSE_MATRIX */ 3713 M = *newmat; 3714 PetscCall(MatGetLocalSize(M, &i, NULL)); 3715 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3716 PetscCall(MatZeroEntries(M)); 3717 /* 3718 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3719 rather than the slower MatSetValues(). 3720 */ 3721 M->was_assembled = PETSC_TRUE; 3722 M->assembled = PETSC_FALSE; 3723 } 3724 3725 /* (5) Set values of Msub to *newmat */ 3726 PetscCall(PetscMalloc1(count, &colsub)); 3727 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3728 3729 jj = aij->j; 3730 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3731 for (i = 0; i < m; i++) { 3732 row = rstart + i; 3733 nz = ii[i + 1] - ii[i]; 3734 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3735 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3736 jj += nz; 3737 aa += nz; 3738 } 3739 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3740 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3741 3742 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3743 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3744 3745 PetscCall(PetscFree(colsub)); 3746 3747 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3748 if (call == MAT_INITIAL_MATRIX) { 3749 *newmat = M; 3750 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3751 PetscCall(MatDestroy(&Msub)); 3752 3753 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3754 PetscCall(ISDestroy(&iscol_sub)); 3755 3756 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3757 PetscCall(ISDestroy(&iscmap)); 3758 3759 if (iscol_local) { 3760 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3761 PetscCall(ISDestroy(&iscol_local)); 3762 } 3763 } 3764 PetscFunctionReturn(PETSC_SUCCESS); 3765 } 3766 3767 /* 3768 Not great since it makes two copies of the submatrix, first an SeqAIJ 3769 in local and then by concatenating the local matrices the end result. 3770 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3771 3772 This requires a sequential iscol with all indices. 3773 */ 3774 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3775 { 3776 PetscMPIInt rank, size; 3777 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3778 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3779 Mat M, Mreuse; 3780 MatScalar *aa, *vwork; 3781 MPI_Comm comm; 3782 Mat_SeqAIJ *aij; 3783 PetscBool colflag, allcolumns = PETSC_FALSE; 3784 3785 PetscFunctionBegin; 3786 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3787 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3788 PetscCallMPI(MPI_Comm_size(comm, &size)); 3789 3790 /* Check for special case: each processor gets entire matrix columns */ 3791 PetscCall(ISIdentity(iscol, &colflag)); 3792 PetscCall(ISGetLocalSize(iscol, &n)); 3793 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3794 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3795 3796 if (call == MAT_REUSE_MATRIX) { 3797 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3798 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3799 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3800 } else { 3801 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3802 } 3803 3804 /* 3805 m - number of local rows 3806 n - number of columns (same on all processors) 3807 rstart - first row in new global matrix generated 3808 */ 3809 PetscCall(MatGetSize(Mreuse, &m, &n)); 3810 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3811 if (call == MAT_INITIAL_MATRIX) { 3812 aij = (Mat_SeqAIJ *)Mreuse->data; 3813 ii = aij->i; 3814 jj = aij->j; 3815 3816 /* 3817 Determine the number of non-zeros in the diagonal and off-diagonal 3818 portions of the matrix in order to do correct preallocation 3819 */ 3820 3821 /* first get start and end of "diagonal" columns */ 3822 if (csize == PETSC_DECIDE) { 3823 PetscCall(ISGetSize(isrow, &mglobal)); 3824 if (mglobal == n) { /* square matrix */ 3825 nlocal = m; 3826 } else { 3827 nlocal = n / size + ((n % size) > rank); 3828 } 3829 } else { 3830 nlocal = csize; 3831 } 3832 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3833 rstart = rend - nlocal; 3834 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3835 3836 /* next, compute all the lengths */ 3837 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3838 olens = dlens + m; 3839 for (i = 0; i < m; i++) { 3840 jend = ii[i + 1] - ii[i]; 3841 olen = 0; 3842 dlen = 0; 3843 for (j = 0; j < jend; j++) { 3844 if (*jj < rstart || *jj >= rend) olen++; 3845 else dlen++; 3846 jj++; 3847 } 3848 olens[i] = olen; 3849 dlens[i] = dlen; 3850 } 3851 PetscCall(MatCreate(comm, &M)); 3852 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3853 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3854 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3855 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3856 PetscCall(PetscFree(dlens)); 3857 } else { 3858 PetscInt ml, nl; 3859 3860 M = *newmat; 3861 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3862 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3863 PetscCall(MatZeroEntries(M)); 3864 /* 3865 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3866 rather than the slower MatSetValues(). 3867 */ 3868 M->was_assembled = PETSC_TRUE; 3869 M->assembled = PETSC_FALSE; 3870 } 3871 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3872 aij = (Mat_SeqAIJ *)Mreuse->data; 3873 ii = aij->i; 3874 jj = aij->j; 3875 3876 /* trigger copy to CPU if needed */ 3877 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3878 for (i = 0; i < m; i++) { 3879 row = rstart + i; 3880 nz = ii[i + 1] - ii[i]; 3881 cwork = jj; 3882 jj = PetscSafePointerPlusOffset(jj, nz); 3883 vwork = aa; 3884 aa = PetscSafePointerPlusOffset(aa, nz); 3885 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3886 } 3887 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3888 3889 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3890 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3891 *newmat = M; 3892 3893 /* save submatrix used in processor for next request */ 3894 if (call == MAT_INITIAL_MATRIX) { 3895 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3896 PetscCall(MatDestroy(&Mreuse)); 3897 } 3898 PetscFunctionReturn(PETSC_SUCCESS); 3899 } 3900 3901 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3902 { 3903 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3904 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3905 const PetscInt *JJ; 3906 PetscBool nooffprocentries; 3907 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3908 3909 PetscFunctionBegin; 3910 PetscCall(PetscLayoutSetUp(B->rmap)); 3911 PetscCall(PetscLayoutSetUp(B->cmap)); 3912 m = B->rmap->n; 3913 cstart = B->cmap->rstart; 3914 cend = B->cmap->rend; 3915 rstart = B->rmap->rstart; 3916 irstart = Ii[0]; 3917 3918 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3919 3920 if (PetscDefined(USE_DEBUG)) { 3921 for (i = 0; i < m; i++) { 3922 nnz = Ii[i + 1] - Ii[i]; 3923 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3924 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3925 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3926 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3927 } 3928 } 3929 3930 for (i = 0; i < m; i++) { 3931 nnz = Ii[i + 1] - Ii[i]; 3932 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3933 nnz_max = PetscMax(nnz_max, nnz); 3934 d = 0; 3935 for (j = 0; j < nnz; j++) { 3936 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3937 } 3938 d_nnz[i] = d; 3939 o_nnz[i] = nnz - d; 3940 } 3941 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3942 PetscCall(PetscFree2(d_nnz, o_nnz)); 3943 3944 for (i = 0; i < m; i++) { 3945 ii = i + rstart; 3946 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3947 } 3948 nooffprocentries = B->nooffprocentries; 3949 B->nooffprocentries = PETSC_TRUE; 3950 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3951 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3952 B->nooffprocentries = nooffprocentries; 3953 3954 /* count number of entries below block diagonal */ 3955 PetscCall(PetscFree(Aij->ld)); 3956 PetscCall(PetscCalloc1(m, &ld)); 3957 Aij->ld = ld; 3958 for (i = 0; i < m; i++) { 3959 nnz = Ii[i + 1] - Ii[i]; 3960 j = 0; 3961 while (j < nnz && J[j] < cstart) j++; 3962 ld[i] = j; 3963 if (J) J += nnz; 3964 } 3965 3966 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3967 PetscFunctionReturn(PETSC_SUCCESS); 3968 } 3969 3970 /*@ 3971 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3972 (the default parallel PETSc format). 3973 3974 Collective 3975 3976 Input Parameters: 3977 + B - the matrix 3978 . i - the indices into `j` for the start of each local row (indices start with zero) 3979 . j - the column indices for each local row (indices start with zero) 3980 - v - optional values in the matrix 3981 3982 Level: developer 3983 3984 Notes: 3985 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3986 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3987 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3988 3989 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3990 3991 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3992 3993 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3994 3995 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3996 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3997 3998 The format which is used for the sparse matrix input, is equivalent to a 3999 row-major ordering.. i.e for the following matrix, the input data expected is 4000 as shown 4001 .vb 4002 1 0 0 4003 2 0 3 P0 4004 ------- 4005 4 5 6 P1 4006 4007 Process0 [P0] rows_owned=[0,1] 4008 i = {0,1,3} [size = nrow+1 = 2+1] 4009 j = {0,0,2} [size = 3] 4010 v = {1,2,3} [size = 3] 4011 4012 Process1 [P1] rows_owned=[2] 4013 i = {0,3} [size = nrow+1 = 1+1] 4014 j = {0,1,2} [size = 3] 4015 v = {4,5,6} [size = 3] 4016 .ve 4017 4018 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4019 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4020 @*/ 4021 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4022 { 4023 PetscFunctionBegin; 4024 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4025 PetscFunctionReturn(PETSC_SUCCESS); 4026 } 4027 4028 /*@ 4029 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4030 (the default parallel PETSc format). For good matrix assembly performance 4031 the user should preallocate the matrix storage by setting the parameters 4032 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4033 4034 Collective 4035 4036 Input Parameters: 4037 + B - the matrix 4038 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4039 (same value is used for all local rows) 4040 . d_nnz - array containing the number of nonzeros in the various rows of the 4041 DIAGONAL portion of the local submatrix (possibly different for each row) 4042 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4043 The size of this array is equal to the number of local rows, i.e 'm'. 4044 For matrices that will be factored, you must leave room for (and set) 4045 the diagonal entry even if it is zero. 4046 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4047 submatrix (same value is used for all local rows). 4048 - o_nnz - array containing the number of nonzeros in the various rows of the 4049 OFF-DIAGONAL portion of the local submatrix (possibly different for 4050 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4051 structure. The size of this array is equal to the number 4052 of local rows, i.e 'm'. 4053 4054 Example Usage: 4055 Consider the following 8x8 matrix with 34 non-zero values, that is 4056 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4057 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4058 as follows 4059 4060 .vb 4061 1 2 0 | 0 3 0 | 0 4 4062 Proc0 0 5 6 | 7 0 0 | 8 0 4063 9 0 10 | 11 0 0 | 12 0 4064 ------------------------------------- 4065 13 0 14 | 15 16 17 | 0 0 4066 Proc1 0 18 0 | 19 20 21 | 0 0 4067 0 0 0 | 22 23 0 | 24 0 4068 ------------------------------------- 4069 Proc2 25 26 27 | 0 0 28 | 29 0 4070 30 0 0 | 31 32 33 | 0 34 4071 .ve 4072 4073 This can be represented as a collection of submatrices as 4074 .vb 4075 A B C 4076 D E F 4077 G H I 4078 .ve 4079 4080 Where the submatrices A,B,C are owned by proc0, D,E,F are 4081 owned by proc1, G,H,I are owned by proc2. 4082 4083 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4084 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4085 The 'M','N' parameters are 8,8, and have the same values on all procs. 4086 4087 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4088 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4089 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4090 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4091 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4092 matrix, and [DF] as another `MATSEQAIJ` matrix. 4093 4094 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4095 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4096 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4097 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4098 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4099 In this case, the values of `d_nz`, `o_nz` are 4100 .vb 4101 proc0 dnz = 2, o_nz = 2 4102 proc1 dnz = 3, o_nz = 2 4103 proc2 dnz = 1, o_nz = 4 4104 .ve 4105 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4106 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4107 for proc3. i.e we are using 12+15+10=37 storage locations to store 4108 34 values. 4109 4110 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4111 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4112 In the above case the values for `d_nnz`, `o_nnz` are 4113 .vb 4114 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4115 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4116 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4117 .ve 4118 Here the space allocated is sum of all the above values i.e 34, and 4119 hence pre-allocation is perfect. 4120 4121 Level: intermediate 4122 4123 Notes: 4124 If the *_nnz parameter is given then the *_nz parameter is ignored 4125 4126 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4127 storage. The stored row and column indices begin with zero. 4128 See [Sparse Matrices](sec_matsparse) for details. 4129 4130 The parallel matrix is partitioned such that the first m0 rows belong to 4131 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4132 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4133 4134 The DIAGONAL portion of the local submatrix of a processor can be defined 4135 as the submatrix which is obtained by extraction the part corresponding to 4136 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4137 first row that belongs to the processor, r2 is the last row belonging to 4138 the this processor, and c1-c2 is range of indices of the local part of a 4139 vector suitable for applying the matrix to. This is an mxn matrix. In the 4140 common case of a square matrix, the row and column ranges are the same and 4141 the DIAGONAL part is also square. The remaining portion of the local 4142 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4143 4144 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4145 4146 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4147 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4148 You can also run with the option `-info` and look for messages with the string 4149 malloc in them to see if additional memory allocation was needed. 4150 4151 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4152 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4153 @*/ 4154 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4155 { 4156 PetscFunctionBegin; 4157 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4158 PetscValidType(B, 1); 4159 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4160 PetscFunctionReturn(PETSC_SUCCESS); 4161 } 4162 4163 /*@ 4164 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4165 CSR format for the local rows. 4166 4167 Collective 4168 4169 Input Parameters: 4170 + comm - MPI communicator 4171 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4172 . n - This value should be the same as the local size used in creating the 4173 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4174 calculated if `N` is given) For square matrices n is almost always `m`. 4175 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4176 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4177 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4178 . j - global column indices 4179 - a - optional matrix values 4180 4181 Output Parameter: 4182 . mat - the matrix 4183 4184 Level: intermediate 4185 4186 Notes: 4187 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4188 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4189 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4190 4191 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4192 4193 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4194 4195 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4196 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4197 4198 The format which is used for the sparse matrix input, is equivalent to a 4199 row-major ordering, i.e., for the following matrix, the input data expected is 4200 as shown 4201 .vb 4202 1 0 0 4203 2 0 3 P0 4204 ------- 4205 4 5 6 P1 4206 4207 Process0 [P0] rows_owned=[0,1] 4208 i = {0,1,3} [size = nrow+1 = 2+1] 4209 j = {0,0,2} [size = 3] 4210 v = {1,2,3} [size = 3] 4211 4212 Process1 [P1] rows_owned=[2] 4213 i = {0,3} [size = nrow+1 = 1+1] 4214 j = {0,1,2} [size = 3] 4215 v = {4,5,6} [size = 3] 4216 .ve 4217 4218 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4219 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4220 @*/ 4221 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4222 { 4223 PetscFunctionBegin; 4224 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4225 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4226 PetscCall(MatCreate(comm, mat)); 4227 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4228 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4229 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4230 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4231 PetscFunctionReturn(PETSC_SUCCESS); 4232 } 4233 4234 /*@ 4235 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4236 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4237 from `MatCreateMPIAIJWithArrays()` 4238 4239 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4240 4241 Collective 4242 4243 Input Parameters: 4244 + mat - the matrix 4245 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4246 . n - This value should be the same as the local size used in creating the 4247 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4248 calculated if N is given) For square matrices n is almost always m. 4249 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4250 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4251 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4252 . J - column indices 4253 - v - matrix values 4254 4255 Level: deprecated 4256 4257 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4258 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4259 @*/ 4260 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4261 { 4262 PetscInt nnz, i; 4263 PetscBool nooffprocentries; 4264 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4265 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4266 PetscScalar *ad, *ao; 4267 PetscInt ldi, Iii, md; 4268 const PetscInt *Adi = Ad->i; 4269 PetscInt *ld = Aij->ld; 4270 4271 PetscFunctionBegin; 4272 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4273 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4274 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4275 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4276 4277 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4278 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4279 4280 for (i = 0; i < m; i++) { 4281 if (PetscDefined(USE_DEBUG)) { 4282 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4283 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4284 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4285 } 4286 } 4287 nnz = Ii[i + 1] - Ii[i]; 4288 Iii = Ii[i]; 4289 ldi = ld[i]; 4290 md = Adi[i + 1] - Adi[i]; 4291 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4292 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4293 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4294 ad += md; 4295 ao += nnz - md; 4296 } 4297 nooffprocentries = mat->nooffprocentries; 4298 mat->nooffprocentries = PETSC_TRUE; 4299 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4300 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4301 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4302 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4303 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4304 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4305 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4306 mat->nooffprocentries = nooffprocentries; 4307 PetscFunctionReturn(PETSC_SUCCESS); 4308 } 4309 4310 /*@ 4311 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4312 4313 Collective 4314 4315 Input Parameters: 4316 + mat - the matrix 4317 - v - matrix values, stored by row 4318 4319 Level: intermediate 4320 4321 Notes: 4322 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4323 4324 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4325 4326 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4327 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4328 @*/ 4329 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4330 { 4331 PetscInt nnz, i, m; 4332 PetscBool nooffprocentries; 4333 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4334 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4335 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4336 PetscScalar *ad, *ao; 4337 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4338 PetscInt ldi, Iii, md; 4339 PetscInt *ld = Aij->ld; 4340 4341 PetscFunctionBegin; 4342 m = mat->rmap->n; 4343 4344 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4345 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4346 Iii = 0; 4347 for (i = 0; i < m; i++) { 4348 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4349 ldi = ld[i]; 4350 md = Adi[i + 1] - Adi[i]; 4351 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4352 ad += md; 4353 if (ao) { 4354 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4355 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4356 ao += nnz - md; 4357 } 4358 Iii += nnz; 4359 } 4360 nooffprocentries = mat->nooffprocentries; 4361 mat->nooffprocentries = PETSC_TRUE; 4362 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4363 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4364 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4365 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4366 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4367 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4368 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4369 mat->nooffprocentries = nooffprocentries; 4370 PetscFunctionReturn(PETSC_SUCCESS); 4371 } 4372 4373 /*@ 4374 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4375 (the default parallel PETSc format). For good matrix assembly performance 4376 the user should preallocate the matrix storage by setting the parameters 4377 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4378 4379 Collective 4380 4381 Input Parameters: 4382 + comm - MPI communicator 4383 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4384 This value should be the same as the local size used in creating the 4385 y vector for the matrix-vector product y = Ax. 4386 . n - This value should be the same as the local size used in creating the 4387 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4388 calculated if N is given) For square matrices n is almost always m. 4389 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4390 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4391 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4392 (same value is used for all local rows) 4393 . d_nnz - array containing the number of nonzeros in the various rows of the 4394 DIAGONAL portion of the local submatrix (possibly different for each row) 4395 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4396 The size of this array is equal to the number of local rows, i.e 'm'. 4397 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4398 submatrix (same value is used for all local rows). 4399 - o_nnz - array containing the number of nonzeros in the various rows of the 4400 OFF-DIAGONAL portion of the local submatrix (possibly different for 4401 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4402 structure. The size of this array is equal to the number 4403 of local rows, i.e 'm'. 4404 4405 Output Parameter: 4406 . A - the matrix 4407 4408 Options Database Keys: 4409 + -mat_no_inode - Do not use inodes 4410 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4411 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4412 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4413 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4414 4415 Level: intermediate 4416 4417 Notes: 4418 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4419 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4420 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4421 4422 If the *_nnz parameter is given then the *_nz parameter is ignored 4423 4424 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4425 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4426 storage requirements for this matrix. 4427 4428 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4429 processor than it must be used on all processors that share the object for 4430 that argument. 4431 4432 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4433 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4434 4435 The user MUST specify either the local or global matrix dimensions 4436 (possibly both). 4437 4438 The parallel matrix is partitioned across processors such that the 4439 first `m0` rows belong to process 0, the next `m1` rows belong to 4440 process 1, the next `m2` rows belong to process 2, etc., where 4441 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4442 values corresponding to [m x N] submatrix. 4443 4444 The columns are logically partitioned with the n0 columns belonging 4445 to 0th partition, the next n1 columns belonging to the next 4446 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4447 4448 The DIAGONAL portion of the local submatrix on any given processor 4449 is the submatrix corresponding to the rows and columns m,n 4450 corresponding to the given processor. i.e diagonal matrix on 4451 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4452 etc. The remaining portion of the local submatrix [m x (N-n)] 4453 constitute the OFF-DIAGONAL portion. The example below better 4454 illustrates this concept. The two matrices, the DIAGONAL portion and 4455 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4456 4457 For a square global matrix we define each processor's diagonal portion 4458 to be its local rows and the corresponding columns (a square submatrix); 4459 each processor's off-diagonal portion encompasses the remainder of the 4460 local matrix (a rectangular submatrix). 4461 4462 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4463 4464 When calling this routine with a single process communicator, a matrix of 4465 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4466 type of communicator, use the construction mechanism 4467 .vb 4468 MatCreate(..., &A); 4469 MatSetType(A, MATMPIAIJ); 4470 MatSetSizes(A, m, n, M, N); 4471 MatMPIAIJSetPreallocation(A, ...); 4472 .ve 4473 4474 By default, this format uses inodes (identical nodes) when possible. 4475 We search for consecutive rows with the same nonzero structure, thereby 4476 reusing matrix information to achieve increased efficiency. 4477 4478 Example Usage: 4479 Consider the following 8x8 matrix with 34 non-zero values, that is 4480 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4481 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4482 as follows 4483 4484 .vb 4485 1 2 0 | 0 3 0 | 0 4 4486 Proc0 0 5 6 | 7 0 0 | 8 0 4487 9 0 10 | 11 0 0 | 12 0 4488 ------------------------------------- 4489 13 0 14 | 15 16 17 | 0 0 4490 Proc1 0 18 0 | 19 20 21 | 0 0 4491 0 0 0 | 22 23 0 | 24 0 4492 ------------------------------------- 4493 Proc2 25 26 27 | 0 0 28 | 29 0 4494 30 0 0 | 31 32 33 | 0 34 4495 .ve 4496 4497 This can be represented as a collection of submatrices as 4498 4499 .vb 4500 A B C 4501 D E F 4502 G H I 4503 .ve 4504 4505 Where the submatrices A,B,C are owned by proc0, D,E,F are 4506 owned by proc1, G,H,I are owned by proc2. 4507 4508 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4509 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4510 The 'M','N' parameters are 8,8, and have the same values on all procs. 4511 4512 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4513 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4514 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4515 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4516 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4517 matrix, and [DF] as another SeqAIJ matrix. 4518 4519 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4520 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4521 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4522 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4523 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4524 In this case, the values of `d_nz`,`o_nz` are 4525 .vb 4526 proc0 dnz = 2, o_nz = 2 4527 proc1 dnz = 3, o_nz = 2 4528 proc2 dnz = 1, o_nz = 4 4529 .ve 4530 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4531 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4532 for proc3. i.e we are using 12+15+10=37 storage locations to store 4533 34 values. 4534 4535 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4536 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4537 In the above case the values for d_nnz,o_nnz are 4538 .vb 4539 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4540 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4541 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4542 .ve 4543 Here the space allocated is sum of all the above values i.e 34, and 4544 hence pre-allocation is perfect. 4545 4546 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4547 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4548 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4549 @*/ 4550 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4551 { 4552 PetscMPIInt size; 4553 4554 PetscFunctionBegin; 4555 PetscCall(MatCreate(comm, A)); 4556 PetscCall(MatSetSizes(*A, m, n, M, N)); 4557 PetscCallMPI(MPI_Comm_size(comm, &size)); 4558 if (size > 1) { 4559 PetscCall(MatSetType(*A, MATMPIAIJ)); 4560 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4561 } else { 4562 PetscCall(MatSetType(*A, MATSEQAIJ)); 4563 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4564 } 4565 PetscFunctionReturn(PETSC_SUCCESS); 4566 } 4567 4568 /*MC 4569 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4570 4571 Synopsis: 4572 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4573 4574 Not Collective 4575 4576 Input Parameter: 4577 . A - the `MATMPIAIJ` matrix 4578 4579 Output Parameters: 4580 + Ad - the diagonal portion of the matrix 4581 . Ao - the off-diagonal portion of the matrix 4582 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4583 - ierr - error code 4584 4585 Level: advanced 4586 4587 Note: 4588 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4589 4590 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4591 M*/ 4592 4593 /*MC 4594 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4595 4596 Synopsis: 4597 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4598 4599 Not Collective 4600 4601 Input Parameters: 4602 + A - the `MATMPIAIJ` matrix 4603 . Ad - the diagonal portion of the matrix 4604 . Ao - the off-diagonal portion of the matrix 4605 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4606 - ierr - error code 4607 4608 Level: advanced 4609 4610 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4611 M*/ 4612 4613 /*@C 4614 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4615 4616 Not Collective 4617 4618 Input Parameter: 4619 . A - The `MATMPIAIJ` matrix 4620 4621 Output Parameters: 4622 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4623 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4624 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4625 4626 Level: intermediate 4627 4628 Note: 4629 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4630 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4631 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4632 local column numbers to global column numbers in the original matrix. 4633 4634 Fortran Notes: 4635 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4636 4637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4638 @*/ 4639 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4640 { 4641 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4642 PetscBool flg; 4643 4644 PetscFunctionBegin; 4645 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4646 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4647 if (Ad) *Ad = a->A; 4648 if (Ao) *Ao = a->B; 4649 if (colmap) *colmap = a->garray; 4650 PetscFunctionReturn(PETSC_SUCCESS); 4651 } 4652 4653 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4654 { 4655 PetscInt m, N, i, rstart, nnz, Ii; 4656 PetscInt *indx; 4657 PetscScalar *values; 4658 MatType rootType; 4659 4660 PetscFunctionBegin; 4661 PetscCall(MatGetSize(inmat, &m, &N)); 4662 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4663 PetscInt *dnz, *onz, sum, bs, cbs; 4664 4665 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4666 /* Check sum(n) = N */ 4667 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4668 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4669 4670 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4671 rstart -= m; 4672 4673 MatPreallocateBegin(comm, m, n, dnz, onz); 4674 for (i = 0; i < m; i++) { 4675 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4676 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4677 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4678 } 4679 4680 PetscCall(MatCreate(comm, outmat)); 4681 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4682 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4683 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4684 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4685 PetscCall(MatSetType(*outmat, rootType)); 4686 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4687 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4688 MatPreallocateEnd(dnz, onz); 4689 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4690 } 4691 4692 /* numeric phase */ 4693 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4694 for (i = 0; i < m; i++) { 4695 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4696 Ii = i + rstart; 4697 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4698 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4699 } 4700 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4701 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4702 PetscFunctionReturn(PETSC_SUCCESS); 4703 } 4704 4705 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4706 { 4707 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4708 4709 PetscFunctionBegin; 4710 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4711 PetscCall(PetscFree(merge->id_r)); 4712 PetscCall(PetscFree(merge->len_s)); 4713 PetscCall(PetscFree(merge->len_r)); 4714 PetscCall(PetscFree(merge->bi)); 4715 PetscCall(PetscFree(merge->bj)); 4716 PetscCall(PetscFree(merge->buf_ri[0])); 4717 PetscCall(PetscFree(merge->buf_ri)); 4718 PetscCall(PetscFree(merge->buf_rj[0])); 4719 PetscCall(PetscFree(merge->buf_rj)); 4720 PetscCall(PetscFree(merge->coi)); 4721 PetscCall(PetscFree(merge->coj)); 4722 PetscCall(PetscFree(merge->owners_co)); 4723 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4724 PetscCall(PetscFree(merge)); 4725 PetscFunctionReturn(PETSC_SUCCESS); 4726 } 4727 4728 #include <../src/mat/utils/freespace.h> 4729 #include <petscbt.h> 4730 4731 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4732 { 4733 MPI_Comm comm; 4734 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4735 PetscMPIInt size, rank, taga, *len_s; 4736 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4737 PetscMPIInt proc, k; 4738 PetscInt **buf_ri, **buf_rj; 4739 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4740 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4741 MPI_Request *s_waits, *r_waits; 4742 MPI_Status *status; 4743 const MatScalar *aa, *a_a; 4744 MatScalar **abuf_r, *ba_i; 4745 Mat_Merge_SeqsToMPI *merge; 4746 PetscContainer container; 4747 4748 PetscFunctionBegin; 4749 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4750 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4751 4752 PetscCallMPI(MPI_Comm_size(comm, &size)); 4753 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4754 4755 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4756 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4757 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4758 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4759 aa = a_a; 4760 4761 bi = merge->bi; 4762 bj = merge->bj; 4763 buf_ri = merge->buf_ri; 4764 buf_rj = merge->buf_rj; 4765 4766 PetscCall(PetscMalloc1(size, &status)); 4767 owners = merge->rowmap->range; 4768 len_s = merge->len_s; 4769 4770 /* send and recv matrix values */ 4771 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4772 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4773 4774 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4775 for (proc = 0, k = 0; proc < size; proc++) { 4776 if (!len_s[proc]) continue; 4777 i = owners[proc]; 4778 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4779 k++; 4780 } 4781 4782 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4783 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4784 PetscCall(PetscFree(status)); 4785 4786 PetscCall(PetscFree(s_waits)); 4787 PetscCall(PetscFree(r_waits)); 4788 4789 /* insert mat values of mpimat */ 4790 PetscCall(PetscMalloc1(N, &ba_i)); 4791 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4792 4793 for (k = 0; k < merge->nrecv; k++) { 4794 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4795 nrows = *buf_ri_k[k]; 4796 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4797 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4798 } 4799 4800 /* set values of ba */ 4801 m = merge->rowmap->n; 4802 for (i = 0; i < m; i++) { 4803 arow = owners[rank] + i; 4804 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4805 bnzi = bi[i + 1] - bi[i]; 4806 PetscCall(PetscArrayzero(ba_i, bnzi)); 4807 4808 /* add local non-zero vals of this proc's seqmat into ba */ 4809 anzi = ai[arow + 1] - ai[arow]; 4810 aj = a->j + ai[arow]; 4811 aa = a_a + ai[arow]; 4812 nextaj = 0; 4813 for (j = 0; nextaj < anzi; j++) { 4814 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4815 ba_i[j] += aa[nextaj++]; 4816 } 4817 } 4818 4819 /* add received vals into ba */ 4820 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4821 /* i-th row */ 4822 if (i == *nextrow[k]) { 4823 anzi = *(nextai[k] + 1) - *nextai[k]; 4824 aj = buf_rj[k] + *nextai[k]; 4825 aa = abuf_r[k] + *nextai[k]; 4826 nextaj = 0; 4827 for (j = 0; nextaj < anzi; j++) { 4828 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4829 ba_i[j] += aa[nextaj++]; 4830 } 4831 } 4832 nextrow[k]++; 4833 nextai[k]++; 4834 } 4835 } 4836 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4837 } 4838 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4839 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4840 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4841 4842 PetscCall(PetscFree(abuf_r[0])); 4843 PetscCall(PetscFree(abuf_r)); 4844 PetscCall(PetscFree(ba_i)); 4845 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4846 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4847 PetscFunctionReturn(PETSC_SUCCESS); 4848 } 4849 4850 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4851 { 4852 Mat B_mpi; 4853 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4854 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4855 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4856 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4857 PetscInt len, *dnz, *onz, bs, cbs; 4858 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4859 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4860 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4861 MPI_Status *status; 4862 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4863 PetscBT lnkbt; 4864 Mat_Merge_SeqsToMPI *merge; 4865 PetscContainer container; 4866 4867 PetscFunctionBegin; 4868 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4869 4870 /* make sure it is a PETSc comm */ 4871 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4872 PetscCallMPI(MPI_Comm_size(comm, &size)); 4873 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4874 4875 PetscCall(PetscNew(&merge)); 4876 PetscCall(PetscMalloc1(size, &status)); 4877 4878 /* determine row ownership */ 4879 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4880 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4881 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4882 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4883 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4884 PetscCall(PetscMalloc1(size, &len_si)); 4885 PetscCall(PetscMalloc1(size, &merge->len_s)); 4886 4887 m = merge->rowmap->n; 4888 owners = merge->rowmap->range; 4889 4890 /* determine the number of messages to send, their lengths */ 4891 len_s = merge->len_s; 4892 4893 len = 0; /* length of buf_si[] */ 4894 merge->nsend = 0; 4895 for (PetscMPIInt proc = 0; proc < size; proc++) { 4896 len_si[proc] = 0; 4897 if (proc == rank) { 4898 len_s[proc] = 0; 4899 } else { 4900 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4901 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4902 } 4903 if (len_s[proc]) { 4904 merge->nsend++; 4905 nrows = 0; 4906 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4907 if (ai[i + 1] > ai[i]) nrows++; 4908 } 4909 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4910 len += len_si[proc]; 4911 } 4912 } 4913 4914 /* determine the number and length of messages to receive for ij-structure */ 4915 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4916 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4917 4918 /* post the Irecv of j-structure */ 4919 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4920 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4921 4922 /* post the Isend of j-structure */ 4923 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4924 4925 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4926 if (!len_s[proc]) continue; 4927 i = owners[proc]; 4928 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4929 k++; 4930 } 4931 4932 /* receives and sends of j-structure are complete */ 4933 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4934 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4935 4936 /* send and recv i-structure */ 4937 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4938 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4939 4940 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4941 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4942 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4943 if (!len_s[proc]) continue; 4944 /* form outgoing message for i-structure: 4945 buf_si[0]: nrows to be sent 4946 [1:nrows]: row index (global) 4947 [nrows+1:2*nrows+1]: i-structure index 4948 */ 4949 nrows = len_si[proc] / 2 - 1; 4950 buf_si_i = buf_si + nrows + 1; 4951 buf_si[0] = nrows; 4952 buf_si_i[0] = 0; 4953 nrows = 0; 4954 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4955 anzi = ai[i + 1] - ai[i]; 4956 if (anzi) { 4957 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4958 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4959 nrows++; 4960 } 4961 } 4962 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4963 k++; 4964 buf_si += len_si[proc]; 4965 } 4966 4967 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4968 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4969 4970 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4971 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4972 4973 PetscCall(PetscFree(len_si)); 4974 PetscCall(PetscFree(len_ri)); 4975 PetscCall(PetscFree(rj_waits)); 4976 PetscCall(PetscFree2(si_waits, sj_waits)); 4977 PetscCall(PetscFree(ri_waits)); 4978 PetscCall(PetscFree(buf_s)); 4979 PetscCall(PetscFree(status)); 4980 4981 /* compute a local seq matrix in each processor */ 4982 /* allocate bi array and free space for accumulating nonzero column info */ 4983 PetscCall(PetscMalloc1(m + 1, &bi)); 4984 bi[0] = 0; 4985 4986 /* create and initialize a linked list */ 4987 nlnk = N + 1; 4988 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4989 4990 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4991 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4992 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4993 4994 current_space = free_space; 4995 4996 /* determine symbolic info for each local row */ 4997 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4998 4999 for (k = 0; k < merge->nrecv; k++) { 5000 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5001 nrows = *buf_ri_k[k]; 5002 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5003 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5004 } 5005 5006 MatPreallocateBegin(comm, m, n, dnz, onz); 5007 len = 0; 5008 for (i = 0; i < m; i++) { 5009 bnzi = 0; 5010 /* add local non-zero cols of this proc's seqmat into lnk */ 5011 arow = owners[rank] + i; 5012 anzi = ai[arow + 1] - ai[arow]; 5013 aj = a->j + ai[arow]; 5014 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5015 bnzi += nlnk; 5016 /* add received col data into lnk */ 5017 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5018 if (i == *nextrow[k]) { /* i-th row */ 5019 anzi = *(nextai[k] + 1) - *nextai[k]; 5020 aj = buf_rj[k] + *nextai[k]; 5021 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5022 bnzi += nlnk; 5023 nextrow[k]++; 5024 nextai[k]++; 5025 } 5026 } 5027 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5028 5029 /* if free space is not available, make more free space */ 5030 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5031 /* copy data into free space, then initialize lnk */ 5032 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5033 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5034 5035 current_space->array += bnzi; 5036 current_space->local_used += bnzi; 5037 current_space->local_remaining -= bnzi; 5038 5039 bi[i + 1] = bi[i] + bnzi; 5040 } 5041 5042 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5043 5044 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5045 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5046 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5047 5048 /* create symbolic parallel matrix B_mpi */ 5049 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5050 PetscCall(MatCreate(comm, &B_mpi)); 5051 if (n == PETSC_DECIDE) { 5052 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5053 } else { 5054 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5055 } 5056 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5057 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5058 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5059 MatPreallocateEnd(dnz, onz); 5060 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5061 5062 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5063 B_mpi->assembled = PETSC_FALSE; 5064 merge->bi = bi; 5065 merge->bj = bj; 5066 merge->buf_ri = buf_ri; 5067 merge->buf_rj = buf_rj; 5068 merge->coi = NULL; 5069 merge->coj = NULL; 5070 merge->owners_co = NULL; 5071 5072 PetscCall(PetscCommDestroy(&comm)); 5073 5074 /* attach the supporting struct to B_mpi for reuse */ 5075 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5076 PetscCall(PetscContainerSetPointer(container, merge)); 5077 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5078 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5079 PetscCall(PetscContainerDestroy(&container)); 5080 *mpimat = B_mpi; 5081 5082 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5083 PetscFunctionReturn(PETSC_SUCCESS); 5084 } 5085 5086 /*@ 5087 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5088 matrices from each processor 5089 5090 Collective 5091 5092 Input Parameters: 5093 + comm - the communicators the parallel matrix will live on 5094 . seqmat - the input sequential matrices 5095 . m - number of local rows (or `PETSC_DECIDE`) 5096 . n - number of local columns (or `PETSC_DECIDE`) 5097 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5098 5099 Output Parameter: 5100 . mpimat - the parallel matrix generated 5101 5102 Level: advanced 5103 5104 Note: 5105 The dimensions of the sequential matrix in each processor MUST be the same. 5106 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5107 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5108 5109 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5110 @*/ 5111 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5112 { 5113 PetscMPIInt size; 5114 5115 PetscFunctionBegin; 5116 PetscCallMPI(MPI_Comm_size(comm, &size)); 5117 if (size == 1) { 5118 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5119 if (scall == MAT_INITIAL_MATRIX) { 5120 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5121 } else { 5122 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5123 } 5124 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5125 PetscFunctionReturn(PETSC_SUCCESS); 5126 } 5127 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5128 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5129 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5130 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 PetscFunctionReturn(PETSC_SUCCESS); 5132 } 5133 5134 /*@ 5135 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5136 5137 Not Collective 5138 5139 Input Parameter: 5140 . A - the matrix 5141 5142 Output Parameter: 5143 . A_loc - the local sequential matrix generated 5144 5145 Level: developer 5146 5147 Notes: 5148 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5149 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5150 `n` is the global column count obtained with `MatGetSize()` 5151 5152 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5153 5154 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5155 5156 Destroy the matrix with `MatDestroy()` 5157 5158 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5159 @*/ 5160 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5161 { 5162 PetscBool mpi; 5163 5164 PetscFunctionBegin; 5165 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5166 if (mpi) { 5167 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5168 } else { 5169 *A_loc = A; 5170 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5171 } 5172 PetscFunctionReturn(PETSC_SUCCESS); 5173 } 5174 5175 /*@ 5176 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5177 5178 Not Collective 5179 5180 Input Parameters: 5181 + A - the matrix 5182 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5183 5184 Output Parameter: 5185 . A_loc - the local sequential matrix generated 5186 5187 Level: developer 5188 5189 Notes: 5190 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5191 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5192 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5193 5194 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5195 5196 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5197 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5198 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5199 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5200 5201 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5202 @*/ 5203 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5204 { 5205 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5206 Mat_SeqAIJ *mat, *a, *b; 5207 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5208 const PetscScalar *aa, *ba, *aav, *bav; 5209 PetscScalar *ca, *cam; 5210 PetscMPIInt size; 5211 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5212 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5213 PetscBool match; 5214 5215 PetscFunctionBegin; 5216 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5217 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5218 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5219 if (size == 1) { 5220 if (scall == MAT_INITIAL_MATRIX) { 5221 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5222 *A_loc = mpimat->A; 5223 } else if (scall == MAT_REUSE_MATRIX) { 5224 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5225 } 5226 PetscFunctionReturn(PETSC_SUCCESS); 5227 } 5228 5229 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5230 a = (Mat_SeqAIJ *)mpimat->A->data; 5231 b = (Mat_SeqAIJ *)mpimat->B->data; 5232 ai = a->i; 5233 aj = a->j; 5234 bi = b->i; 5235 bj = b->j; 5236 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5237 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5238 aa = aav; 5239 ba = bav; 5240 if (scall == MAT_INITIAL_MATRIX) { 5241 PetscCall(PetscMalloc1(1 + am, &ci)); 5242 ci[0] = 0; 5243 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5244 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5245 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5246 k = 0; 5247 for (i = 0; i < am; i++) { 5248 ncols_o = bi[i + 1] - bi[i]; 5249 ncols_d = ai[i + 1] - ai[i]; 5250 /* off-diagonal portion of A */ 5251 for (jo = 0; jo < ncols_o; jo++) { 5252 col = cmap[*bj]; 5253 if (col >= cstart) break; 5254 cj[k] = col; 5255 bj++; 5256 ca[k++] = *ba++; 5257 } 5258 /* diagonal portion of A */ 5259 for (j = 0; j < ncols_d; j++) { 5260 cj[k] = cstart + *aj++; 5261 ca[k++] = *aa++; 5262 } 5263 /* off-diagonal portion of A */ 5264 for (j = jo; j < ncols_o; j++) { 5265 cj[k] = cmap[*bj++]; 5266 ca[k++] = *ba++; 5267 } 5268 } 5269 /* put together the new matrix */ 5270 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5271 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5272 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5273 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5274 mat->free_a = PETSC_TRUE; 5275 mat->free_ij = PETSC_TRUE; 5276 mat->nonew = 0; 5277 } else if (scall == MAT_REUSE_MATRIX) { 5278 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5279 ci = mat->i; 5280 cj = mat->j; 5281 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5282 for (i = 0; i < am; i++) { 5283 /* off-diagonal portion of A */ 5284 ncols_o = bi[i + 1] - bi[i]; 5285 for (jo = 0; jo < ncols_o; jo++) { 5286 col = cmap[*bj]; 5287 if (col >= cstart) break; 5288 *cam++ = *ba++; 5289 bj++; 5290 } 5291 /* diagonal portion of A */ 5292 ncols_d = ai[i + 1] - ai[i]; 5293 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5294 /* off-diagonal portion of A */ 5295 for (j = jo; j < ncols_o; j++) { 5296 *cam++ = *ba++; 5297 bj++; 5298 } 5299 } 5300 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5301 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5302 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5303 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5304 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5305 PetscFunctionReturn(PETSC_SUCCESS); 5306 } 5307 5308 /*@ 5309 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5310 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5311 5312 Not Collective 5313 5314 Input Parameters: 5315 + A - the matrix 5316 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5317 5318 Output Parameters: 5319 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5320 - A_loc - the local sequential matrix generated 5321 5322 Level: developer 5323 5324 Note: 5325 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5326 part, then those associated with the off-diagonal part (in its local ordering) 5327 5328 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5329 @*/ 5330 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5331 { 5332 Mat Ao, Ad; 5333 const PetscInt *cmap; 5334 PetscMPIInt size; 5335 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5336 5337 PetscFunctionBegin; 5338 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5339 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5340 if (size == 1) { 5341 if (scall == MAT_INITIAL_MATRIX) { 5342 PetscCall(PetscObjectReference((PetscObject)Ad)); 5343 *A_loc = Ad; 5344 } else if (scall == MAT_REUSE_MATRIX) { 5345 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5346 } 5347 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5348 PetscFunctionReturn(PETSC_SUCCESS); 5349 } 5350 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5351 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5352 if (f) { 5353 PetscCall((*f)(A, scall, glob, A_loc)); 5354 } else { 5355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5357 Mat_SeqAIJ *c; 5358 PetscInt *ai = a->i, *aj = a->j; 5359 PetscInt *bi = b->i, *bj = b->j; 5360 PetscInt *ci, *cj; 5361 const PetscScalar *aa, *ba; 5362 PetscScalar *ca; 5363 PetscInt i, j, am, dn, on; 5364 5365 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5366 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5367 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5368 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5369 if (scall == MAT_INITIAL_MATRIX) { 5370 PetscInt k; 5371 PetscCall(PetscMalloc1(1 + am, &ci)); 5372 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5373 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5374 ci[0] = 0; 5375 for (i = 0, k = 0; i < am; i++) { 5376 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5377 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5378 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5379 /* diagonal portion of A */ 5380 for (j = 0; j < ncols_d; j++, k++) { 5381 cj[k] = *aj++; 5382 ca[k] = *aa++; 5383 } 5384 /* off-diagonal portion of A */ 5385 for (j = 0; j < ncols_o; j++, k++) { 5386 cj[k] = dn + *bj++; 5387 ca[k] = *ba++; 5388 } 5389 } 5390 /* put together the new matrix */ 5391 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5392 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5393 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5394 c = (Mat_SeqAIJ *)(*A_loc)->data; 5395 c->free_a = PETSC_TRUE; 5396 c->free_ij = PETSC_TRUE; 5397 c->nonew = 0; 5398 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5399 } else if (scall == MAT_REUSE_MATRIX) { 5400 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5401 for (i = 0; i < am; i++) { 5402 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5403 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5404 /* diagonal portion of A */ 5405 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5406 /* off-diagonal portion of A */ 5407 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5408 } 5409 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5410 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5411 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5412 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5413 if (glob) { 5414 PetscInt cst, *gidx; 5415 5416 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5417 PetscCall(PetscMalloc1(dn + on, &gidx)); 5418 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5419 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5420 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5421 } 5422 } 5423 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5424 PetscFunctionReturn(PETSC_SUCCESS); 5425 } 5426 5427 /*@C 5428 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5429 5430 Not Collective 5431 5432 Input Parameters: 5433 + A - the matrix 5434 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5435 . row - index set of rows to extract (or `NULL`) 5436 - col - index set of columns to extract (or `NULL`) 5437 5438 Output Parameter: 5439 . A_loc - the local sequential matrix generated 5440 5441 Level: developer 5442 5443 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5444 @*/ 5445 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5446 { 5447 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5448 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5449 IS isrowa, iscola; 5450 Mat *aloc; 5451 PetscBool match; 5452 5453 PetscFunctionBegin; 5454 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5455 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5456 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5457 if (!row) { 5458 start = A->rmap->rstart; 5459 end = A->rmap->rend; 5460 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5461 } else { 5462 isrowa = *row; 5463 } 5464 if (!col) { 5465 start = A->cmap->rstart; 5466 cmap = a->garray; 5467 nzA = a->A->cmap->n; 5468 nzB = a->B->cmap->n; 5469 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5470 ncols = 0; 5471 for (i = 0; i < nzB; i++) { 5472 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5473 else break; 5474 } 5475 imark = i; 5476 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5477 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5478 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5479 } else { 5480 iscola = *col; 5481 } 5482 if (scall != MAT_INITIAL_MATRIX) { 5483 PetscCall(PetscMalloc1(1, &aloc)); 5484 aloc[0] = *A_loc; 5485 } 5486 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5487 if (!col) { /* attach global id of condensed columns */ 5488 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5489 } 5490 *A_loc = aloc[0]; 5491 PetscCall(PetscFree(aloc)); 5492 if (!row) PetscCall(ISDestroy(&isrowa)); 5493 if (!col) PetscCall(ISDestroy(&iscola)); 5494 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5495 PetscFunctionReturn(PETSC_SUCCESS); 5496 } 5497 5498 /* 5499 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5500 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5501 * on a global size. 5502 * */ 5503 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5504 { 5505 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5506 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5507 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5508 PetscMPIInt owner; 5509 PetscSFNode *iremote, *oiremote; 5510 const PetscInt *lrowindices; 5511 PetscSF sf, osf; 5512 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5513 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5514 MPI_Comm comm; 5515 ISLocalToGlobalMapping mapping; 5516 const PetscScalar *pd_a, *po_a; 5517 5518 PetscFunctionBegin; 5519 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5520 /* plocalsize is the number of roots 5521 * nrows is the number of leaves 5522 * */ 5523 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5524 PetscCall(ISGetLocalSize(rows, &nrows)); 5525 PetscCall(PetscCalloc1(nrows, &iremote)); 5526 PetscCall(ISGetIndices(rows, &lrowindices)); 5527 for (i = 0; i < nrows; i++) { 5528 /* Find a remote index and an owner for a row 5529 * The row could be local or remote 5530 * */ 5531 owner = 0; 5532 lidx = 0; 5533 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5534 iremote[i].index = lidx; 5535 iremote[i].rank = owner; 5536 } 5537 /* Create SF to communicate how many nonzero columns for each row */ 5538 PetscCall(PetscSFCreate(comm, &sf)); 5539 /* SF will figure out the number of nonzero columns for each row, and their 5540 * offsets 5541 * */ 5542 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5543 PetscCall(PetscSFSetFromOptions(sf)); 5544 PetscCall(PetscSFSetUp(sf)); 5545 5546 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5547 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5548 PetscCall(PetscCalloc1(nrows, &pnnz)); 5549 roffsets[0] = 0; 5550 roffsets[1] = 0; 5551 for (i = 0; i < plocalsize; i++) { 5552 /* diagonal */ 5553 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5554 /* off-diagonal */ 5555 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5556 /* compute offsets so that we relative location for each row */ 5557 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5558 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5559 } 5560 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5561 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5562 /* 'r' means root, and 'l' means leaf */ 5563 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5564 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5565 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5566 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5567 PetscCall(PetscSFDestroy(&sf)); 5568 PetscCall(PetscFree(roffsets)); 5569 PetscCall(PetscFree(nrcols)); 5570 dntotalcols = 0; 5571 ontotalcols = 0; 5572 ncol = 0; 5573 for (i = 0; i < nrows; i++) { 5574 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5575 ncol = PetscMax(pnnz[i], ncol); 5576 /* diagonal */ 5577 dntotalcols += nlcols[i * 2 + 0]; 5578 /* off-diagonal */ 5579 ontotalcols += nlcols[i * 2 + 1]; 5580 } 5581 /* We do not need to figure the right number of columns 5582 * since all the calculations will be done by going through the raw data 5583 * */ 5584 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5585 PetscCall(MatSetUp(*P_oth)); 5586 PetscCall(PetscFree(pnnz)); 5587 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5588 /* diagonal */ 5589 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5590 /* off-diagonal */ 5591 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5592 /* diagonal */ 5593 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5594 /* off-diagonal */ 5595 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5596 dntotalcols = 0; 5597 ontotalcols = 0; 5598 ntotalcols = 0; 5599 for (i = 0; i < nrows; i++) { 5600 owner = 0; 5601 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5602 /* Set iremote for diag matrix */ 5603 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5604 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5605 iremote[dntotalcols].rank = owner; 5606 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5607 ilocal[dntotalcols++] = ntotalcols++; 5608 } 5609 /* off-diagonal */ 5610 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5611 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5612 oiremote[ontotalcols].rank = owner; 5613 oilocal[ontotalcols++] = ntotalcols++; 5614 } 5615 } 5616 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5617 PetscCall(PetscFree(loffsets)); 5618 PetscCall(PetscFree(nlcols)); 5619 PetscCall(PetscSFCreate(comm, &sf)); 5620 /* P serves as roots and P_oth is leaves 5621 * Diag matrix 5622 * */ 5623 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5624 PetscCall(PetscSFSetFromOptions(sf)); 5625 PetscCall(PetscSFSetUp(sf)); 5626 5627 PetscCall(PetscSFCreate(comm, &osf)); 5628 /* off-diagonal */ 5629 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5630 PetscCall(PetscSFSetFromOptions(osf)); 5631 PetscCall(PetscSFSetUp(osf)); 5632 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5633 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5634 /* operate on the matrix internal data to save memory */ 5635 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5636 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5637 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5638 /* Convert to global indices for diag matrix */ 5639 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5640 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5641 /* We want P_oth store global indices */ 5642 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5643 /* Use memory scalable approach */ 5644 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5645 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5646 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5647 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5648 /* Convert back to local indices */ 5649 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5650 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5651 nout = 0; 5652 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5653 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5654 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5655 /* Exchange values */ 5656 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5657 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5658 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5659 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5660 /* Stop PETSc from shrinking memory */ 5661 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5662 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5663 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5664 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5665 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5666 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5667 PetscCall(PetscSFDestroy(&sf)); 5668 PetscCall(PetscSFDestroy(&osf)); 5669 PetscFunctionReturn(PETSC_SUCCESS); 5670 } 5671 5672 /* 5673 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5674 * This supports MPIAIJ and MAIJ 5675 * */ 5676 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5677 { 5678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5679 Mat_SeqAIJ *p_oth; 5680 IS rows, map; 5681 PetscHMapI hamp; 5682 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5683 MPI_Comm comm; 5684 PetscSF sf, osf; 5685 PetscBool has; 5686 5687 PetscFunctionBegin; 5688 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5689 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5690 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5691 * and then create a submatrix (that often is an overlapping matrix) 5692 * */ 5693 if (reuse == MAT_INITIAL_MATRIX) { 5694 /* Use a hash table to figure out unique keys */ 5695 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5696 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5697 count = 0; 5698 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5699 for (i = 0; i < a->B->cmap->n; i++) { 5700 key = a->garray[i] / dof; 5701 PetscCall(PetscHMapIHas(hamp, key, &has)); 5702 if (!has) { 5703 mapping[i] = count; 5704 PetscCall(PetscHMapISet(hamp, key, count++)); 5705 } else { 5706 /* Current 'i' has the same value the previous step */ 5707 mapping[i] = count - 1; 5708 } 5709 } 5710 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5711 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5712 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5713 PetscCall(PetscCalloc1(htsize, &rowindices)); 5714 off = 0; 5715 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5716 PetscCall(PetscHMapIDestroy(&hamp)); 5717 PetscCall(PetscSortInt(htsize, rowindices)); 5718 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5719 /* In case, the matrix was already created but users want to recreate the matrix */ 5720 PetscCall(MatDestroy(P_oth)); 5721 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5722 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5723 PetscCall(ISDestroy(&map)); 5724 PetscCall(ISDestroy(&rows)); 5725 } else if (reuse == MAT_REUSE_MATRIX) { 5726 /* If matrix was already created, we simply update values using SF objects 5727 * that as attached to the matrix earlier. 5728 */ 5729 const PetscScalar *pd_a, *po_a; 5730 5731 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5732 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5733 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5734 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5735 /* Update values in place */ 5736 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5737 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5738 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5739 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5740 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5741 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5742 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5743 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5744 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5745 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5746 PetscFunctionReturn(PETSC_SUCCESS); 5747 } 5748 5749 /*@C 5750 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5751 5752 Collective 5753 5754 Input Parameters: 5755 + A - the first matrix in `MATMPIAIJ` format 5756 . B - the second matrix in `MATMPIAIJ` format 5757 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5758 5759 Output Parameters: 5760 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5761 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5762 - B_seq - the sequential matrix generated 5763 5764 Level: developer 5765 5766 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5767 @*/ 5768 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5769 { 5770 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5771 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5772 IS isrowb, iscolb; 5773 Mat *bseq = NULL; 5774 5775 PetscFunctionBegin; 5776 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5777 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5778 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5779 5780 if (scall == MAT_INITIAL_MATRIX) { 5781 start = A->cmap->rstart; 5782 cmap = a->garray; 5783 nzA = a->A->cmap->n; 5784 nzB = a->B->cmap->n; 5785 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5786 ncols = 0; 5787 for (i = 0; i < nzB; i++) { /* row < local row index */ 5788 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5789 else break; 5790 } 5791 imark = i; 5792 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5793 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5794 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5795 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5796 } else { 5797 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5798 isrowb = *rowb; 5799 iscolb = *colb; 5800 PetscCall(PetscMalloc1(1, &bseq)); 5801 bseq[0] = *B_seq; 5802 } 5803 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5804 *B_seq = bseq[0]; 5805 PetscCall(PetscFree(bseq)); 5806 if (!rowb) { 5807 PetscCall(ISDestroy(&isrowb)); 5808 } else { 5809 *rowb = isrowb; 5810 } 5811 if (!colb) { 5812 PetscCall(ISDestroy(&iscolb)); 5813 } else { 5814 *colb = iscolb; 5815 } 5816 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5817 PetscFunctionReturn(PETSC_SUCCESS); 5818 } 5819 5820 /* 5821 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5822 of the OFF-DIAGONAL portion of local A 5823 5824 Collective 5825 5826 Input Parameters: 5827 + A,B - the matrices in `MATMPIAIJ` format 5828 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5829 5830 Output Parameter: 5831 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5832 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5833 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5834 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5835 5836 Developer Note: 5837 This directly accesses information inside the VecScatter associated with the matrix-vector product 5838 for this matrix. This is not desirable.. 5839 5840 Level: developer 5841 5842 */ 5843 5844 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5845 { 5846 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5847 VecScatter ctx; 5848 MPI_Comm comm; 5849 const PetscMPIInt *rprocs, *sprocs; 5850 PetscMPIInt nrecvs, nsends; 5851 const PetscInt *srow, *rstarts, *sstarts; 5852 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5853 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5854 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5855 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5856 PetscMPIInt size, tag, rank, nreqs; 5857 5858 PetscFunctionBegin; 5859 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5860 PetscCallMPI(MPI_Comm_size(comm, &size)); 5861 5862 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5863 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5864 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5865 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5866 5867 if (size == 1) { 5868 startsj_s = NULL; 5869 bufa_ptr = NULL; 5870 *B_oth = NULL; 5871 PetscFunctionReturn(PETSC_SUCCESS); 5872 } 5873 5874 ctx = a->Mvctx; 5875 tag = ((PetscObject)ctx)->tag; 5876 5877 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5878 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5879 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5880 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5881 PetscCall(PetscMalloc1(nreqs, &reqs)); 5882 rwaits = reqs; 5883 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5884 5885 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5886 if (scall == MAT_INITIAL_MATRIX) { 5887 /* i-array */ 5888 /* post receives */ 5889 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5890 for (i = 0; i < nrecvs; i++) { 5891 rowlen = rvalues + rstarts[i] * rbs; 5892 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5893 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5894 } 5895 5896 /* pack the outgoing message */ 5897 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5898 5899 sstartsj[0] = 0; 5900 rstartsj[0] = 0; 5901 len = 0; /* total length of j or a array to be sent */ 5902 if (nsends) { 5903 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5904 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5905 } 5906 for (i = 0; i < nsends; i++) { 5907 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5908 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5909 for (j = 0; j < nrows; j++) { 5910 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5911 for (l = 0; l < sbs; l++) { 5912 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5913 5914 rowlen[j * sbs + l] = ncols; 5915 5916 len += ncols; 5917 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5918 } 5919 k++; 5920 } 5921 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5922 5923 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5924 } 5925 /* recvs and sends of i-array are completed */ 5926 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5927 PetscCall(PetscFree(svalues)); 5928 5929 /* allocate buffers for sending j and a arrays */ 5930 PetscCall(PetscMalloc1(len + 1, &bufj)); 5931 PetscCall(PetscMalloc1(len + 1, &bufa)); 5932 5933 /* create i-array of B_oth */ 5934 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5935 5936 b_othi[0] = 0; 5937 len = 0; /* total length of j or a array to be received */ 5938 k = 0; 5939 for (i = 0; i < nrecvs; i++) { 5940 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5941 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5942 for (j = 0; j < nrows; j++) { 5943 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5944 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5945 k++; 5946 } 5947 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5948 } 5949 PetscCall(PetscFree(rvalues)); 5950 5951 /* allocate space for j and a arrays of B_oth */ 5952 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5953 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5954 5955 /* j-array */ 5956 /* post receives of j-array */ 5957 for (i = 0; i < nrecvs; i++) { 5958 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5959 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5960 } 5961 5962 /* pack the outgoing message j-array */ 5963 if (nsends) k = sstarts[0]; 5964 for (i = 0; i < nsends; i++) { 5965 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5966 bufJ = bufj + sstartsj[i]; 5967 for (j = 0; j < nrows; j++) { 5968 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5969 for (ll = 0; ll < sbs; ll++) { 5970 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5971 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5972 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5973 } 5974 } 5975 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5976 } 5977 5978 /* recvs and sends of j-array are completed */ 5979 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5980 } else if (scall == MAT_REUSE_MATRIX) { 5981 sstartsj = *startsj_s; 5982 rstartsj = *startsj_r; 5983 bufa = *bufa_ptr; 5984 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5985 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5986 5987 /* a-array */ 5988 /* post receives of a-array */ 5989 for (i = 0; i < nrecvs; i++) { 5990 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5991 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5992 } 5993 5994 /* pack the outgoing message a-array */ 5995 if (nsends) k = sstarts[0]; 5996 for (i = 0; i < nsends; i++) { 5997 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5998 bufA = bufa + sstartsj[i]; 5999 for (j = 0; j < nrows; j++) { 6000 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6001 for (ll = 0; ll < sbs; ll++) { 6002 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6003 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6004 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6005 } 6006 } 6007 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6008 } 6009 /* recvs and sends of a-array are completed */ 6010 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6011 PetscCall(PetscFree(reqs)); 6012 6013 if (scall == MAT_INITIAL_MATRIX) { 6014 Mat_SeqAIJ *b_oth; 6015 6016 /* put together the new matrix */ 6017 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6018 6019 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6020 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6021 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6022 b_oth->free_a = PETSC_TRUE; 6023 b_oth->free_ij = PETSC_TRUE; 6024 b_oth->nonew = 0; 6025 6026 PetscCall(PetscFree(bufj)); 6027 if (!startsj_s || !bufa_ptr) { 6028 PetscCall(PetscFree2(sstartsj, rstartsj)); 6029 PetscCall(PetscFree(bufa_ptr)); 6030 } else { 6031 *startsj_s = sstartsj; 6032 *startsj_r = rstartsj; 6033 *bufa_ptr = bufa; 6034 } 6035 } else if (scall == MAT_REUSE_MATRIX) { 6036 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6037 } 6038 6039 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6040 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6041 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6042 PetscFunctionReturn(PETSC_SUCCESS); 6043 } 6044 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6048 #if defined(PETSC_HAVE_MKL_SPARSE) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6053 #if defined(PETSC_HAVE_ELEMENTAL) 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6055 #endif 6056 #if defined(PETSC_HAVE_SCALAPACK) 6057 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6058 #endif 6059 #if defined(PETSC_HAVE_HYPRE) 6060 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 #if defined(PETSC_HAVE_CUDA) 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 #if defined(PETSC_HAVE_HIP) 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6072 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6074 6075 /* 6076 Computes (B'*A')' since computing B*A directly is untenable 6077 6078 n p p 6079 [ ] [ ] [ ] 6080 m [ A ] * n [ B ] = m [ C ] 6081 [ ] [ ] [ ] 6082 6083 */ 6084 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6085 { 6086 Mat At, Bt, Ct; 6087 6088 PetscFunctionBegin; 6089 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6090 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6091 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6092 PetscCall(MatDestroy(&At)); 6093 PetscCall(MatDestroy(&Bt)); 6094 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6095 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6096 PetscCall(MatDestroy(&Ct)); 6097 PetscFunctionReturn(PETSC_SUCCESS); 6098 } 6099 6100 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6101 { 6102 PetscBool cisdense; 6103 6104 PetscFunctionBegin; 6105 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6106 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6107 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6108 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6109 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6110 PetscCall(MatSetUp(C)); 6111 6112 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6113 PetscFunctionReturn(PETSC_SUCCESS); 6114 } 6115 6116 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6117 { 6118 Mat_Product *product = C->product; 6119 Mat A = product->A, B = product->B; 6120 6121 PetscFunctionBegin; 6122 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6123 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6124 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6125 C->ops->productsymbolic = MatProductSymbolic_AB; 6126 PetscFunctionReturn(PETSC_SUCCESS); 6127 } 6128 6129 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6130 { 6131 Mat_Product *product = C->product; 6132 6133 PetscFunctionBegin; 6134 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6135 PetscFunctionReturn(PETSC_SUCCESS); 6136 } 6137 6138 /* 6139 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6140 6141 Input Parameters: 6142 6143 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6144 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6145 6146 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6147 6148 For Set1, j1[] contains column indices of the nonzeros. 6149 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6150 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6151 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6152 6153 Similar for Set2. 6154 6155 This routine merges the two sets of nonzeros row by row and removes repeats. 6156 6157 Output Parameters: (memory is allocated by the caller) 6158 6159 i[],j[]: the CSR of the merged matrix, which has m rows. 6160 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6161 imap2[]: similar to imap1[], but for Set2. 6162 Note we order nonzeros row-by-row and from left to right. 6163 */ 6164 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6165 { 6166 PetscInt r, m; /* Row index of mat */ 6167 PetscCount t, t1, t2, b1, e1, b2, e2; 6168 6169 PetscFunctionBegin; 6170 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6171 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6172 i[0] = 0; 6173 for (r = 0; r < m; r++) { /* Do row by row merging */ 6174 b1 = rowBegin1[r]; 6175 e1 = rowEnd1[r]; 6176 b2 = rowBegin2[r]; 6177 e2 = rowEnd2[r]; 6178 while (b1 < e1 && b2 < e2) { 6179 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6180 j[t] = j1[b1]; 6181 imap1[t1] = t; 6182 imap2[t2] = t; 6183 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6184 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6185 t1++; 6186 t2++; 6187 t++; 6188 } else if (j1[b1] < j2[b2]) { 6189 j[t] = j1[b1]; 6190 imap1[t1] = t; 6191 b1 += jmap1[t1 + 1] - jmap1[t1]; 6192 t1++; 6193 t++; 6194 } else { 6195 j[t] = j2[b2]; 6196 imap2[t2] = t; 6197 b2 += jmap2[t2 + 1] - jmap2[t2]; 6198 t2++; 6199 t++; 6200 } 6201 } 6202 /* Merge the remaining in either j1[] or j2[] */ 6203 while (b1 < e1) { 6204 j[t] = j1[b1]; 6205 imap1[t1] = t; 6206 b1 += jmap1[t1 + 1] - jmap1[t1]; 6207 t1++; 6208 t++; 6209 } 6210 while (b2 < e2) { 6211 j[t] = j2[b2]; 6212 imap2[t2] = t; 6213 b2 += jmap2[t2 + 1] - jmap2[t2]; 6214 t2++; 6215 t++; 6216 } 6217 PetscCall(PetscIntCast(t, i + r + 1)); 6218 } 6219 PetscFunctionReturn(PETSC_SUCCESS); 6220 } 6221 6222 /* 6223 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6224 6225 Input Parameters: 6226 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6227 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6228 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6229 6230 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6231 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6232 6233 Output Parameters: 6234 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6235 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6236 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6237 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6238 6239 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6240 Atot: number of entries belonging to the diagonal block. 6241 Annz: number of unique nonzeros belonging to the diagonal block. 6242 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6243 repeats (i.e., same 'i,j' pair). 6244 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6245 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6246 6247 Atot: number of entries belonging to the diagonal block 6248 Annz: number of unique nonzeros belonging to the diagonal block. 6249 6250 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6251 6252 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6253 */ 6254 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6255 { 6256 PetscInt cstart, cend, rstart, rend, row, col; 6257 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6258 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6259 PetscCount k, m, p, q, r, s, mid; 6260 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6261 6262 PetscFunctionBegin; 6263 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6264 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6265 m = rend - rstart; 6266 6267 /* Skip negative rows */ 6268 for (k = 0; k < n; k++) 6269 if (i[k] >= 0) break; 6270 6271 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6272 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6273 */ 6274 while (k < n) { 6275 row = i[k]; 6276 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6277 for (s = k; s < n; s++) 6278 if (i[s] != row) break; 6279 6280 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6281 for (p = k; p < s; p++) { 6282 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6283 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6284 } 6285 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6286 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6287 rowBegin[row - rstart] = k; 6288 rowMid[row - rstart] = mid; 6289 rowEnd[row - rstart] = s; 6290 6291 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6292 Atot += mid - k; 6293 Btot += s - mid; 6294 6295 /* Count unique nonzeros of this diag row */ 6296 for (p = k; p < mid;) { 6297 col = j[p]; 6298 do { 6299 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6300 p++; 6301 } while (p < mid && j[p] == col); 6302 Annz++; 6303 } 6304 6305 /* Count unique nonzeros of this offdiag row */ 6306 for (p = mid; p < s;) { 6307 col = j[p]; 6308 do { 6309 p++; 6310 } while (p < s && j[p] == col); 6311 Bnnz++; 6312 } 6313 k = s; 6314 } 6315 6316 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6317 PetscCall(PetscMalloc1(Atot, &Aperm)); 6318 PetscCall(PetscMalloc1(Btot, &Bperm)); 6319 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6320 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6321 6322 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6323 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6324 for (r = 0; r < m; r++) { 6325 k = rowBegin[r]; 6326 mid = rowMid[r]; 6327 s = rowEnd[r]; 6328 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6329 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6330 Atot += mid - k; 6331 Btot += s - mid; 6332 6333 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6334 for (p = k; p < mid;) { 6335 col = j[p]; 6336 q = p; 6337 do { 6338 p++; 6339 } while (p < mid && j[p] == col); 6340 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6341 Annz++; 6342 } 6343 6344 for (p = mid; p < s;) { 6345 col = j[p]; 6346 q = p; 6347 do { 6348 p++; 6349 } while (p < s && j[p] == col); 6350 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6351 Bnnz++; 6352 } 6353 } 6354 /* Output */ 6355 *Aperm_ = Aperm; 6356 *Annz_ = Annz; 6357 *Atot_ = Atot; 6358 *Ajmap_ = Ajmap; 6359 *Bperm_ = Bperm; 6360 *Bnnz_ = Bnnz; 6361 *Btot_ = Btot; 6362 *Bjmap_ = Bjmap; 6363 PetscFunctionReturn(PETSC_SUCCESS); 6364 } 6365 6366 /* 6367 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6368 6369 Input Parameters: 6370 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6371 nnz: number of unique nonzeros in the merged matrix 6372 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6373 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6374 6375 Output Parameter: (memory is allocated by the caller) 6376 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6377 6378 Example: 6379 nnz1 = 4 6380 nnz = 6 6381 imap = [1,3,4,5] 6382 jmap = [0,3,5,6,7] 6383 then, 6384 jmap_new = [0,0,3,3,5,6,7] 6385 */ 6386 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6387 { 6388 PetscCount k, p; 6389 6390 PetscFunctionBegin; 6391 jmap_new[0] = 0; 6392 p = nnz; /* p loops over jmap_new[] backwards */ 6393 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6394 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6395 } 6396 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6397 PetscFunctionReturn(PETSC_SUCCESS); 6398 } 6399 6400 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6401 { 6402 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6403 6404 PetscFunctionBegin; 6405 PetscCall(PetscSFDestroy(&coo->sf)); 6406 PetscCall(PetscFree(coo->Aperm1)); 6407 PetscCall(PetscFree(coo->Bperm1)); 6408 PetscCall(PetscFree(coo->Ajmap1)); 6409 PetscCall(PetscFree(coo->Bjmap1)); 6410 PetscCall(PetscFree(coo->Aimap2)); 6411 PetscCall(PetscFree(coo->Bimap2)); 6412 PetscCall(PetscFree(coo->Aperm2)); 6413 PetscCall(PetscFree(coo->Bperm2)); 6414 PetscCall(PetscFree(coo->Ajmap2)); 6415 PetscCall(PetscFree(coo->Bjmap2)); 6416 PetscCall(PetscFree(coo->Cperm1)); 6417 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6418 PetscCall(PetscFree(coo)); 6419 PetscFunctionReturn(PETSC_SUCCESS); 6420 } 6421 6422 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6423 { 6424 MPI_Comm comm; 6425 PetscMPIInt rank, size; 6426 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6427 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6428 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6429 PetscContainer container; 6430 MatCOOStruct_MPIAIJ *coo; 6431 6432 PetscFunctionBegin; 6433 PetscCall(PetscFree(mpiaij->garray)); 6434 PetscCall(VecDestroy(&mpiaij->lvec)); 6435 #if defined(PETSC_USE_CTABLE) 6436 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6437 #else 6438 PetscCall(PetscFree(mpiaij->colmap)); 6439 #endif 6440 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6441 mat->assembled = PETSC_FALSE; 6442 mat->was_assembled = PETSC_FALSE; 6443 6444 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6445 PetscCallMPI(MPI_Comm_size(comm, &size)); 6446 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6447 PetscCall(PetscLayoutSetUp(mat->rmap)); 6448 PetscCall(PetscLayoutSetUp(mat->cmap)); 6449 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6450 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6451 PetscCall(MatGetLocalSize(mat, &m, &n)); 6452 PetscCall(MatGetSize(mat, &M, &N)); 6453 6454 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6455 /* entries come first, then local rows, then remote rows. */ 6456 PetscCount n1 = coo_n, *perm1; 6457 PetscInt *i1 = coo_i, *j1 = coo_j; 6458 6459 PetscCall(PetscMalloc1(n1, &perm1)); 6460 for (k = 0; k < n1; k++) perm1[k] = k; 6461 6462 /* Manipulate indices so that entries with negative row or col indices will have smallest 6463 row indices, local entries will have greater but negative row indices, and remote entries 6464 will have positive row indices. 6465 */ 6466 for (k = 0; k < n1; k++) { 6467 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6468 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6469 else { 6470 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6471 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6472 } 6473 } 6474 6475 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6476 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6477 6478 /* Advance k to the first entry we need to take care of */ 6479 for (k = 0; k < n1; k++) 6480 if (i1[k] > PETSC_INT_MIN) break; 6481 PetscCount i1start = k; 6482 6483 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6484 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6485 6486 /* Send remote rows to their owner */ 6487 /* Find which rows should be sent to which remote ranks*/ 6488 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6489 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6490 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6491 const PetscInt *ranges; 6492 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6493 6494 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6495 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6496 for (k = rem; k < n1;) { 6497 PetscMPIInt owner; 6498 PetscInt firstRow, lastRow; 6499 6500 /* Locate a row range */ 6501 firstRow = i1[k]; /* first row of this owner */ 6502 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6503 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6504 6505 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6506 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6507 6508 /* All entries in [k,p) belong to this remote owner */ 6509 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6510 PetscMPIInt *sendto2; 6511 PetscInt *nentries2; 6512 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6513 6514 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6515 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6516 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6517 PetscCall(PetscFree2(sendto, nentries2)); 6518 sendto = sendto2; 6519 nentries = nentries2; 6520 maxNsend = maxNsend2; 6521 } 6522 sendto[nsend] = owner; 6523 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6524 nsend++; 6525 k = p; 6526 } 6527 6528 /* Build 1st SF to know offsets on remote to send data */ 6529 PetscSF sf1; 6530 PetscInt nroots = 1, nroots2 = 0; 6531 PetscInt nleaves = nsend, nleaves2 = 0; 6532 PetscInt *offsets; 6533 PetscSFNode *iremote; 6534 6535 PetscCall(PetscSFCreate(comm, &sf1)); 6536 PetscCall(PetscMalloc1(nsend, &iremote)); 6537 PetscCall(PetscMalloc1(nsend, &offsets)); 6538 for (k = 0; k < nsend; k++) { 6539 iremote[k].rank = sendto[k]; 6540 iremote[k].index = 0; 6541 nleaves2 += nentries[k]; 6542 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6543 } 6544 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6545 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6546 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6547 PetscCall(PetscSFDestroy(&sf1)); 6548 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6549 6550 /* Build 2nd SF to send remote COOs to their owner */ 6551 PetscSF sf2; 6552 nroots = nroots2; 6553 nleaves = nleaves2; 6554 PetscCall(PetscSFCreate(comm, &sf2)); 6555 PetscCall(PetscSFSetFromOptions(sf2)); 6556 PetscCall(PetscMalloc1(nleaves, &iremote)); 6557 p = 0; 6558 for (k = 0; k < nsend; k++) { 6559 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6560 for (q = 0; q < nentries[k]; q++, p++) { 6561 iremote[p].rank = sendto[k]; 6562 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6563 } 6564 } 6565 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6566 6567 /* Send the remote COOs to their owner */ 6568 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6569 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6570 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6571 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6572 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6573 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6574 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6575 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6576 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6577 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6578 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6579 6580 PetscCall(PetscFree(offsets)); 6581 PetscCall(PetscFree2(sendto, nentries)); 6582 6583 /* Sort received COOs by row along with the permutation array */ 6584 for (k = 0; k < n2; k++) perm2[k] = k; 6585 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6586 6587 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6588 PetscCount *Cperm1; 6589 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6590 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6591 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6592 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6593 6594 /* Support for HYPRE matrices, kind of a hack. 6595 Swap min column with diagonal so that diagonal values will go first */ 6596 PetscBool hypre; 6597 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6598 if (hypre) { 6599 PetscInt *minj; 6600 PetscBT hasdiag; 6601 6602 PetscCall(PetscBTCreate(m, &hasdiag)); 6603 PetscCall(PetscMalloc1(m, &minj)); 6604 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6605 for (k = i1start; k < rem; k++) { 6606 if (j1[k] < cstart || j1[k] >= cend) continue; 6607 const PetscInt rindex = i1[k] - rstart; 6608 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6609 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6610 } 6611 for (k = 0; k < n2; k++) { 6612 if (j2[k] < cstart || j2[k] >= cend) continue; 6613 const PetscInt rindex = i2[k] - rstart; 6614 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6615 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6616 } 6617 for (k = i1start; k < rem; k++) { 6618 const PetscInt rindex = i1[k] - rstart; 6619 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6620 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6621 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6622 } 6623 for (k = 0; k < n2; k++) { 6624 const PetscInt rindex = i2[k] - rstart; 6625 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6626 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6627 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6628 } 6629 PetscCall(PetscBTDestroy(&hasdiag)); 6630 PetscCall(PetscFree(minj)); 6631 } 6632 6633 /* Split local COOs and received COOs into diag/offdiag portions */ 6634 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6635 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6636 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6637 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6638 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6639 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6640 6641 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6642 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6643 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6644 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6645 6646 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6647 PetscInt *Ai, *Bi; 6648 PetscInt *Aj, *Bj; 6649 6650 PetscCall(PetscMalloc1(m + 1, &Ai)); 6651 PetscCall(PetscMalloc1(m + 1, &Bi)); 6652 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6653 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6654 6655 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6656 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6657 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6658 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6659 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6660 6661 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6662 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6663 6664 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6665 /* expect nonzeros in A/B most likely have local contributing entries */ 6666 PetscInt Annz = Ai[m]; 6667 PetscInt Bnnz = Bi[m]; 6668 PetscCount *Ajmap1_new, *Bjmap1_new; 6669 6670 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6671 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6672 6673 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6674 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6675 6676 PetscCall(PetscFree(Aimap1)); 6677 PetscCall(PetscFree(Ajmap1)); 6678 PetscCall(PetscFree(Bimap1)); 6679 PetscCall(PetscFree(Bjmap1)); 6680 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6681 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6682 PetscCall(PetscFree(perm1)); 6683 PetscCall(PetscFree3(i2, j2, perm2)); 6684 6685 Ajmap1 = Ajmap1_new; 6686 Bjmap1 = Bjmap1_new; 6687 6688 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6689 if (Annz < Annz1 + Annz2) { 6690 PetscInt *Aj_new; 6691 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6692 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6693 PetscCall(PetscFree(Aj)); 6694 Aj = Aj_new; 6695 } 6696 6697 if (Bnnz < Bnnz1 + Bnnz2) { 6698 PetscInt *Bj_new; 6699 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6700 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6701 PetscCall(PetscFree(Bj)); 6702 Bj = Bj_new; 6703 } 6704 6705 /* Create new submatrices for on-process and off-process coupling */ 6706 PetscScalar *Aa, *Ba; 6707 MatType rtype; 6708 Mat_SeqAIJ *a, *b; 6709 PetscObjectState state; 6710 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6711 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6712 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6713 if (cstart) { 6714 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6715 } 6716 6717 PetscCall(MatGetRootType_Private(mat, &rtype)); 6718 6719 MatSeqXAIJGetOptions_Private(mpiaij->A); 6720 PetscCall(MatDestroy(&mpiaij->A)); 6721 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6722 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6723 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6724 6725 MatSeqXAIJGetOptions_Private(mpiaij->B); 6726 PetscCall(MatDestroy(&mpiaij->B)); 6727 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6728 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6729 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6730 6731 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6732 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6733 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6734 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6735 6736 a = (Mat_SeqAIJ *)mpiaij->A->data; 6737 b = (Mat_SeqAIJ *)mpiaij->B->data; 6738 a->free_a = PETSC_TRUE; 6739 a->free_ij = PETSC_TRUE; 6740 b->free_a = PETSC_TRUE; 6741 b->free_ij = PETSC_TRUE; 6742 a->maxnz = a->nz; 6743 b->maxnz = b->nz; 6744 6745 /* conversion must happen AFTER multiply setup */ 6746 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6747 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6748 PetscCall(VecDestroy(&mpiaij->lvec)); 6749 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6750 6751 // Put the COO struct in a container and then attach that to the matrix 6752 PetscCall(PetscMalloc1(1, &coo)); 6753 coo->n = coo_n; 6754 coo->sf = sf2; 6755 coo->sendlen = nleaves; 6756 coo->recvlen = nroots; 6757 coo->Annz = Annz; 6758 coo->Bnnz = Bnnz; 6759 coo->Annz2 = Annz2; 6760 coo->Bnnz2 = Bnnz2; 6761 coo->Atot1 = Atot1; 6762 coo->Atot2 = Atot2; 6763 coo->Btot1 = Btot1; 6764 coo->Btot2 = Btot2; 6765 coo->Ajmap1 = Ajmap1; 6766 coo->Aperm1 = Aperm1; 6767 coo->Bjmap1 = Bjmap1; 6768 coo->Bperm1 = Bperm1; 6769 coo->Aimap2 = Aimap2; 6770 coo->Ajmap2 = Ajmap2; 6771 coo->Aperm2 = Aperm2; 6772 coo->Bimap2 = Bimap2; 6773 coo->Bjmap2 = Bjmap2; 6774 coo->Bperm2 = Bperm2; 6775 coo->Cperm1 = Cperm1; 6776 // Allocate in preallocation. If not used, it has zero cost on host 6777 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6778 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6779 PetscCall(PetscContainerSetPointer(container, coo)); 6780 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6781 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6782 PetscCall(PetscContainerDestroy(&container)); 6783 PetscFunctionReturn(PETSC_SUCCESS); 6784 } 6785 6786 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6787 { 6788 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6789 Mat A = mpiaij->A, B = mpiaij->B; 6790 PetscScalar *Aa, *Ba; 6791 PetscScalar *sendbuf, *recvbuf; 6792 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6793 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6794 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6795 const PetscCount *Cperm1; 6796 PetscContainer container; 6797 MatCOOStruct_MPIAIJ *coo; 6798 6799 PetscFunctionBegin; 6800 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6801 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6802 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6803 sendbuf = coo->sendbuf; 6804 recvbuf = coo->recvbuf; 6805 Ajmap1 = coo->Ajmap1; 6806 Ajmap2 = coo->Ajmap2; 6807 Aimap2 = coo->Aimap2; 6808 Bjmap1 = coo->Bjmap1; 6809 Bjmap2 = coo->Bjmap2; 6810 Bimap2 = coo->Bimap2; 6811 Aperm1 = coo->Aperm1; 6812 Aperm2 = coo->Aperm2; 6813 Bperm1 = coo->Bperm1; 6814 Bperm2 = coo->Bperm2; 6815 Cperm1 = coo->Cperm1; 6816 6817 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6818 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6819 6820 /* Pack entries to be sent to remote */ 6821 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6822 6823 /* Send remote entries to their owner and overlap the communication with local computation */ 6824 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6825 /* Add local entries to A and B */ 6826 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6827 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6828 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6829 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6830 } 6831 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6832 PetscScalar sum = 0.0; 6833 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6834 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6835 } 6836 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6837 6838 /* Add received remote entries to A and B */ 6839 for (PetscCount i = 0; i < coo->Annz2; i++) { 6840 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6841 } 6842 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6843 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6844 } 6845 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6846 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6847 PetscFunctionReturn(PETSC_SUCCESS); 6848 } 6849 6850 /*MC 6851 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6852 6853 Options Database Keys: 6854 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6855 6856 Level: beginner 6857 6858 Notes: 6859 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6860 in this case the values associated with the rows and columns one passes in are set to zero 6861 in the matrix 6862 6863 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6864 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6865 6866 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6867 M*/ 6868 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6869 { 6870 Mat_MPIAIJ *b; 6871 PetscMPIInt size; 6872 6873 PetscFunctionBegin; 6874 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6875 6876 PetscCall(PetscNew(&b)); 6877 B->data = (void *)b; 6878 B->ops[0] = MatOps_Values; 6879 B->assembled = PETSC_FALSE; 6880 B->insertmode = NOT_SET_VALUES; 6881 b->size = size; 6882 6883 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6884 6885 /* build cache for off array entries formed */ 6886 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6887 6888 b->donotstash = PETSC_FALSE; 6889 b->colmap = NULL; 6890 b->garray = NULL; 6891 b->roworiented = PETSC_TRUE; 6892 6893 /* stuff used for matrix vector multiply */ 6894 b->lvec = NULL; 6895 b->Mvctx = NULL; 6896 6897 /* stuff for MatGetRow() */ 6898 b->rowindices = NULL; 6899 b->rowvalues = NULL; 6900 b->getrowactive = PETSC_FALSE; 6901 6902 /* flexible pointer used in CUSPARSE classes */ 6903 b->spptr = NULL; 6904 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6916 #if defined(PETSC_HAVE_CUDA) 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6918 #endif 6919 #if defined(PETSC_HAVE_HIP) 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6921 #endif 6922 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6924 #endif 6925 #if defined(PETSC_HAVE_MKL_SPARSE) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6927 #endif 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6932 #if defined(PETSC_HAVE_ELEMENTAL) 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6934 #endif 6935 #if defined(PETSC_HAVE_SCALAPACK) 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6937 #endif 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6940 #if defined(PETSC_HAVE_HYPRE) 6941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6943 #endif 6944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6948 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6949 PetscFunctionReturn(PETSC_SUCCESS); 6950 } 6951 6952 /*@ 6953 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6954 and "off-diagonal" part of the matrix in CSR format. 6955 6956 Collective 6957 6958 Input Parameters: 6959 + comm - MPI communicator 6960 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6961 . n - This value should be the same as the local size used in creating the 6962 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6963 calculated if `N` is given) For square matrices `n` is almost always `m`. 6964 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6965 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6966 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6967 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6968 . a - matrix values 6969 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6970 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6971 - oa - matrix values 6972 6973 Output Parameter: 6974 . mat - the matrix 6975 6976 Level: advanced 6977 6978 Notes: 6979 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6980 must free the arrays once the matrix has been destroyed and not before. 6981 6982 The `i` and `j` indices are 0 based 6983 6984 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6985 6986 This sets local rows and cannot be used to set off-processor values. 6987 6988 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6989 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6990 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6991 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6992 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6993 communication if it is known that only local entries will be set. 6994 6995 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6996 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6997 @*/ 6998 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6999 { 7000 Mat_MPIAIJ *maij; 7001 7002 PetscFunctionBegin; 7003 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7004 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7005 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7006 PetscCall(MatCreate(comm, mat)); 7007 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7008 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7009 maij = (Mat_MPIAIJ *)(*mat)->data; 7010 7011 (*mat)->preallocated = PETSC_TRUE; 7012 7013 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7014 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7015 7016 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7017 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7018 7019 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7020 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7021 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7022 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7023 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7024 PetscFunctionReturn(PETSC_SUCCESS); 7025 } 7026 7027 typedef struct { 7028 Mat *mp; /* intermediate products */ 7029 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7030 PetscInt cp; /* number of intermediate products */ 7031 7032 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7033 PetscInt *startsj_s, *startsj_r; 7034 PetscScalar *bufa; 7035 Mat P_oth; 7036 7037 /* may take advantage of merging product->B */ 7038 Mat Bloc; /* B-local by merging diag and off-diag */ 7039 7040 /* cusparse does not have support to split between symbolic and numeric phases. 7041 When api_user is true, we don't need to update the numerical values 7042 of the temporary storage */ 7043 PetscBool reusesym; 7044 7045 /* support for COO values insertion */ 7046 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7047 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7048 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7049 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7050 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7051 PetscMemType mtype; 7052 7053 /* customization */ 7054 PetscBool abmerge; 7055 PetscBool P_oth_bind; 7056 } MatMatMPIAIJBACKEND; 7057 7058 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7059 { 7060 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7061 PetscInt i; 7062 7063 PetscFunctionBegin; 7064 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7065 PetscCall(PetscFree(mmdata->bufa)); 7066 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7067 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7068 PetscCall(MatDestroy(&mmdata->P_oth)); 7069 PetscCall(MatDestroy(&mmdata->Bloc)); 7070 PetscCall(PetscSFDestroy(&mmdata->sf)); 7071 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7072 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7073 PetscCall(PetscFree(mmdata->own[0])); 7074 PetscCall(PetscFree(mmdata->own)); 7075 PetscCall(PetscFree(mmdata->off[0])); 7076 PetscCall(PetscFree(mmdata->off)); 7077 PetscCall(PetscFree(mmdata)); 7078 PetscFunctionReturn(PETSC_SUCCESS); 7079 } 7080 7081 /* Copy selected n entries with indices in idx[] of A to v[]. 7082 If idx is NULL, copy the whole data array of A to v[] 7083 */ 7084 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7085 { 7086 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7087 7088 PetscFunctionBegin; 7089 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7090 if (f) { 7091 PetscCall((*f)(A, n, idx, v)); 7092 } else { 7093 const PetscScalar *vv; 7094 7095 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7096 if (n && idx) { 7097 PetscScalar *w = v; 7098 const PetscInt *oi = idx; 7099 PetscInt j; 7100 7101 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7102 } else { 7103 PetscCall(PetscArraycpy(v, vv, n)); 7104 } 7105 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7106 } 7107 PetscFunctionReturn(PETSC_SUCCESS); 7108 } 7109 7110 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7111 { 7112 MatMatMPIAIJBACKEND *mmdata; 7113 PetscInt i, n_d, n_o; 7114 7115 PetscFunctionBegin; 7116 MatCheckProduct(C, 1); 7117 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7118 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7119 if (!mmdata->reusesym) { /* update temporary matrices */ 7120 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7121 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7122 } 7123 mmdata->reusesym = PETSC_FALSE; 7124 7125 for (i = 0; i < mmdata->cp; i++) { 7126 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7127 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7128 } 7129 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7130 PetscInt noff; 7131 7132 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7133 if (mmdata->mptmp[i]) continue; 7134 if (noff) { 7135 PetscInt nown; 7136 7137 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7138 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7139 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7140 n_o += noff; 7141 n_d += nown; 7142 } else { 7143 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7144 7145 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7146 n_d += mm->nz; 7147 } 7148 } 7149 if (mmdata->hasoffproc) { /* offprocess insertion */ 7150 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7151 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7152 } 7153 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7154 PetscFunctionReturn(PETSC_SUCCESS); 7155 } 7156 7157 /* Support for Pt * A, A * P, or Pt * A * P */ 7158 #define MAX_NUMBER_INTERMEDIATE 4 7159 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7160 { 7161 Mat_Product *product = C->product; 7162 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7163 Mat_MPIAIJ *a, *p; 7164 MatMatMPIAIJBACKEND *mmdata; 7165 ISLocalToGlobalMapping P_oth_l2g = NULL; 7166 IS glob = NULL; 7167 const char *prefix; 7168 char pprefix[256]; 7169 const PetscInt *globidx, *P_oth_idx; 7170 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7171 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7172 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7173 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7174 /* a base offset; type-2: sparse with a local to global map table */ 7175 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7176 7177 MatProductType ptype; 7178 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7179 PetscMPIInt size; 7180 7181 PetscFunctionBegin; 7182 MatCheckProduct(C, 1); 7183 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7184 ptype = product->type; 7185 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7186 ptype = MATPRODUCT_AB; 7187 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7188 } 7189 switch (ptype) { 7190 case MATPRODUCT_AB: 7191 A = product->A; 7192 P = product->B; 7193 m = A->rmap->n; 7194 n = P->cmap->n; 7195 M = A->rmap->N; 7196 N = P->cmap->N; 7197 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7198 break; 7199 case MATPRODUCT_AtB: 7200 P = product->A; 7201 A = product->B; 7202 m = P->cmap->n; 7203 n = A->cmap->n; 7204 M = P->cmap->N; 7205 N = A->cmap->N; 7206 hasoffproc = PETSC_TRUE; 7207 break; 7208 case MATPRODUCT_PtAP: 7209 A = product->A; 7210 P = product->B; 7211 m = P->cmap->n; 7212 n = P->cmap->n; 7213 M = P->cmap->N; 7214 N = P->cmap->N; 7215 hasoffproc = PETSC_TRUE; 7216 break; 7217 default: 7218 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7219 } 7220 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7221 if (size == 1) hasoffproc = PETSC_FALSE; 7222 7223 /* defaults */ 7224 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7225 mp[i] = NULL; 7226 mptmp[i] = PETSC_FALSE; 7227 rmapt[i] = -1; 7228 cmapt[i] = -1; 7229 rmapa[i] = NULL; 7230 cmapa[i] = NULL; 7231 } 7232 7233 /* customization */ 7234 PetscCall(PetscNew(&mmdata)); 7235 mmdata->reusesym = product->api_user; 7236 if (ptype == MATPRODUCT_AB) { 7237 if (product->api_user) { 7238 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7239 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7240 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7241 PetscOptionsEnd(); 7242 } else { 7243 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7244 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7245 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7246 PetscOptionsEnd(); 7247 } 7248 } else if (ptype == MATPRODUCT_PtAP) { 7249 if (product->api_user) { 7250 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7251 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7252 PetscOptionsEnd(); 7253 } else { 7254 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7255 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7256 PetscOptionsEnd(); 7257 } 7258 } 7259 a = (Mat_MPIAIJ *)A->data; 7260 p = (Mat_MPIAIJ *)P->data; 7261 PetscCall(MatSetSizes(C, m, n, M, N)); 7262 PetscCall(PetscLayoutSetUp(C->rmap)); 7263 PetscCall(PetscLayoutSetUp(C->cmap)); 7264 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7265 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7266 7267 cp = 0; 7268 switch (ptype) { 7269 case MATPRODUCT_AB: /* A * P */ 7270 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7271 7272 /* A_diag * P_local (merged or not) */ 7273 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7274 /* P is product->B */ 7275 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7276 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7277 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7278 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7279 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7280 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7281 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7282 mp[cp]->product->api_user = product->api_user; 7283 PetscCall(MatProductSetFromOptions(mp[cp])); 7284 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7285 PetscCall(ISGetIndices(glob, &globidx)); 7286 rmapt[cp] = 1; 7287 cmapt[cp] = 2; 7288 cmapa[cp] = globidx; 7289 mptmp[cp] = PETSC_FALSE; 7290 cp++; 7291 } else { /* A_diag * P_diag and A_diag * P_off */ 7292 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7293 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7294 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7295 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7296 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7297 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7298 mp[cp]->product->api_user = product->api_user; 7299 PetscCall(MatProductSetFromOptions(mp[cp])); 7300 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7301 rmapt[cp] = 1; 7302 cmapt[cp] = 1; 7303 mptmp[cp] = PETSC_FALSE; 7304 cp++; 7305 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7306 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7307 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7308 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7309 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7310 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7311 mp[cp]->product->api_user = product->api_user; 7312 PetscCall(MatProductSetFromOptions(mp[cp])); 7313 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7314 rmapt[cp] = 1; 7315 cmapt[cp] = 2; 7316 cmapa[cp] = p->garray; 7317 mptmp[cp] = PETSC_FALSE; 7318 cp++; 7319 } 7320 7321 /* A_off * P_other */ 7322 if (mmdata->P_oth) { 7323 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7324 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7325 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7326 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7327 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 rmapt[cp] = 1; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = P_oth_idx; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 } 7342 break; 7343 7344 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7345 /* A is product->B */ 7346 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7347 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7348 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7349 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7350 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7351 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7352 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7353 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7354 mp[cp]->product->api_user = product->api_user; 7355 PetscCall(MatProductSetFromOptions(mp[cp])); 7356 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7357 PetscCall(ISGetIndices(glob, &globidx)); 7358 rmapt[cp] = 2; 7359 rmapa[cp] = globidx; 7360 cmapt[cp] = 2; 7361 cmapa[cp] = globidx; 7362 mptmp[cp] = PETSC_FALSE; 7363 cp++; 7364 } else { 7365 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7366 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7367 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7368 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7369 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7370 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7371 mp[cp]->product->api_user = product->api_user; 7372 PetscCall(MatProductSetFromOptions(mp[cp])); 7373 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7374 PetscCall(ISGetIndices(glob, &globidx)); 7375 rmapt[cp] = 1; 7376 cmapt[cp] = 2; 7377 cmapa[cp] = globidx; 7378 mptmp[cp] = PETSC_FALSE; 7379 cp++; 7380 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7381 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7382 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7383 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7384 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7385 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7386 mp[cp]->product->api_user = product->api_user; 7387 PetscCall(MatProductSetFromOptions(mp[cp])); 7388 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7389 rmapt[cp] = 2; 7390 rmapa[cp] = p->garray; 7391 cmapt[cp] = 2; 7392 cmapa[cp] = globidx; 7393 mptmp[cp] = PETSC_FALSE; 7394 cp++; 7395 } 7396 break; 7397 case MATPRODUCT_PtAP: 7398 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7399 /* P is product->B */ 7400 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7401 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7402 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7403 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7404 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7405 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7406 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7407 mp[cp]->product->api_user = product->api_user; 7408 PetscCall(MatProductSetFromOptions(mp[cp])); 7409 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7410 PetscCall(ISGetIndices(glob, &globidx)); 7411 rmapt[cp] = 2; 7412 rmapa[cp] = globidx; 7413 cmapt[cp] = 2; 7414 cmapa[cp] = globidx; 7415 mptmp[cp] = PETSC_FALSE; 7416 cp++; 7417 if (mmdata->P_oth) { 7418 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7419 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7420 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7421 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7422 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7423 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7424 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7425 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7426 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7427 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7428 mp[cp]->product->api_user = product->api_user; 7429 PetscCall(MatProductSetFromOptions(mp[cp])); 7430 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7431 mptmp[cp] = PETSC_TRUE; 7432 cp++; 7433 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7434 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7435 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7436 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7437 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7438 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7439 mp[cp]->product->api_user = product->api_user; 7440 PetscCall(MatProductSetFromOptions(mp[cp])); 7441 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7442 rmapt[cp] = 2; 7443 rmapa[cp] = globidx; 7444 cmapt[cp] = 2; 7445 cmapa[cp] = P_oth_idx; 7446 mptmp[cp] = PETSC_FALSE; 7447 cp++; 7448 } 7449 break; 7450 default: 7451 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7452 } 7453 /* sanity check */ 7454 if (size > 1) 7455 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7456 7457 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7458 for (i = 0; i < cp; i++) { 7459 mmdata->mp[i] = mp[i]; 7460 mmdata->mptmp[i] = mptmp[i]; 7461 } 7462 mmdata->cp = cp; 7463 C->product->data = mmdata; 7464 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7465 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7466 7467 /* memory type */ 7468 mmdata->mtype = PETSC_MEMTYPE_HOST; 7469 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7470 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7471 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7472 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7473 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7474 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7475 7476 /* prepare coo coordinates for values insertion */ 7477 7478 /* count total nonzeros of those intermediate seqaij Mats 7479 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7480 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7481 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7482 */ 7483 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7484 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7485 if (mptmp[cp]) continue; 7486 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7487 const PetscInt *rmap = rmapa[cp]; 7488 const PetscInt mr = mp[cp]->rmap->n; 7489 const PetscInt rs = C->rmap->rstart; 7490 const PetscInt re = C->rmap->rend; 7491 const PetscInt *ii = mm->i; 7492 for (i = 0; i < mr; i++) { 7493 const PetscInt gr = rmap[i]; 7494 const PetscInt nz = ii[i + 1] - ii[i]; 7495 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7496 else ncoo_oown += nz; /* this row is local */ 7497 } 7498 } else ncoo_d += mm->nz; 7499 } 7500 7501 /* 7502 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7503 7504 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7505 7506 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7507 7508 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7509 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7510 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7511 7512 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7513 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7514 */ 7515 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7516 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7517 7518 /* gather (i,j) of nonzeros inserted by remote procs */ 7519 if (hasoffproc) { 7520 PetscSF msf; 7521 PetscInt ncoo2, *coo_i2, *coo_j2; 7522 7523 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7524 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7525 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7526 7527 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7528 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7529 PetscInt *idxoff = mmdata->off[cp]; 7530 PetscInt *idxown = mmdata->own[cp]; 7531 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7532 const PetscInt *rmap = rmapa[cp]; 7533 const PetscInt *cmap = cmapa[cp]; 7534 const PetscInt *ii = mm->i; 7535 PetscInt *coi = coo_i + ncoo_o; 7536 PetscInt *coj = coo_j + ncoo_o; 7537 const PetscInt mr = mp[cp]->rmap->n; 7538 const PetscInt rs = C->rmap->rstart; 7539 const PetscInt re = C->rmap->rend; 7540 const PetscInt cs = C->cmap->rstart; 7541 for (i = 0; i < mr; i++) { 7542 const PetscInt *jj = mm->j + ii[i]; 7543 const PetscInt gr = rmap[i]; 7544 const PetscInt nz = ii[i + 1] - ii[i]; 7545 if (gr < rs || gr >= re) { /* this is an offproc row */ 7546 for (j = ii[i]; j < ii[i + 1]; j++) { 7547 *coi++ = gr; 7548 *idxoff++ = j; 7549 } 7550 if (!cmapt[cp]) { /* already global */ 7551 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7552 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7553 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7554 } else { /* offdiag */ 7555 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7556 } 7557 ncoo_o += nz; 7558 } else { /* this is a local row */ 7559 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7560 } 7561 } 7562 } 7563 mmdata->off[cp + 1] = idxoff; 7564 mmdata->own[cp + 1] = idxown; 7565 } 7566 7567 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7568 PetscInt incoo_o; 7569 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7570 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7571 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7572 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7573 ncoo = ncoo_d + ncoo_oown + ncoo2; 7574 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7575 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7576 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7577 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7578 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7579 PetscCall(PetscFree2(coo_i, coo_j)); 7580 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7581 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7582 coo_i = coo_i2; 7583 coo_j = coo_j2; 7584 } else { /* no offproc values insertion */ 7585 ncoo = ncoo_d; 7586 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7587 7588 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7589 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7590 PetscCall(PetscSFSetUp(mmdata->sf)); 7591 } 7592 mmdata->hasoffproc = hasoffproc; 7593 7594 /* gather (i,j) of nonzeros inserted locally */ 7595 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7596 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7597 PetscInt *coi = coo_i + ncoo_d; 7598 PetscInt *coj = coo_j + ncoo_d; 7599 const PetscInt *jj = mm->j; 7600 const PetscInt *ii = mm->i; 7601 const PetscInt *cmap = cmapa[cp]; 7602 const PetscInt *rmap = rmapa[cp]; 7603 const PetscInt mr = mp[cp]->rmap->n; 7604 const PetscInt rs = C->rmap->rstart; 7605 const PetscInt re = C->rmap->rend; 7606 const PetscInt cs = C->cmap->rstart; 7607 7608 if (mptmp[cp]) continue; 7609 if (rmapt[cp] == 1) { /* consecutive rows */ 7610 /* fill coo_i */ 7611 for (i = 0; i < mr; i++) { 7612 const PetscInt gr = i + rs; 7613 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7614 } 7615 /* fill coo_j */ 7616 if (!cmapt[cp]) { /* type-0, already global */ 7617 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7618 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7619 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7620 } else { /* type-2, local to global for sparse columns */ 7621 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7622 } 7623 ncoo_d += mm->nz; 7624 } else if (rmapt[cp] == 2) { /* sparse rows */ 7625 for (i = 0; i < mr; i++) { 7626 const PetscInt *jj = mm->j + ii[i]; 7627 const PetscInt gr = rmap[i]; 7628 const PetscInt nz = ii[i + 1] - ii[i]; 7629 if (gr >= rs && gr < re) { /* local rows */ 7630 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7631 if (!cmapt[cp]) { /* type-0, already global */ 7632 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7633 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7634 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7635 } else { /* type-2, local to global for sparse columns */ 7636 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7637 } 7638 ncoo_d += nz; 7639 } 7640 } 7641 } 7642 } 7643 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7644 PetscCall(ISDestroy(&glob)); 7645 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7646 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7647 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7648 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7649 7650 /* set block sizes */ 7651 A = product->A; 7652 P = product->B; 7653 switch (ptype) { 7654 case MATPRODUCT_PtAP: 7655 if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7656 break; 7657 case MATPRODUCT_RARt: 7658 if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7659 break; 7660 case MATPRODUCT_ABC: 7661 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7662 break; 7663 case MATPRODUCT_AB: 7664 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7665 break; 7666 case MATPRODUCT_AtB: 7667 if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7668 break; 7669 case MATPRODUCT_ABt: 7670 if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7671 break; 7672 default: 7673 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7674 } 7675 7676 /* preallocate with COO data */ 7677 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7678 PetscCall(PetscFree2(coo_i, coo_j)); 7679 PetscFunctionReturn(PETSC_SUCCESS); 7680 } 7681 7682 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7683 { 7684 Mat_Product *product = mat->product; 7685 #if defined(PETSC_HAVE_DEVICE) 7686 PetscBool match = PETSC_FALSE; 7687 PetscBool usecpu = PETSC_FALSE; 7688 #else 7689 PetscBool match = PETSC_TRUE; 7690 #endif 7691 7692 PetscFunctionBegin; 7693 MatCheckProduct(mat, 1); 7694 #if defined(PETSC_HAVE_DEVICE) 7695 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7696 if (match) { /* we can always fallback to the CPU if requested */ 7697 switch (product->type) { 7698 case MATPRODUCT_AB: 7699 if (product->api_user) { 7700 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7701 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7702 PetscOptionsEnd(); 7703 } else { 7704 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7705 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7706 PetscOptionsEnd(); 7707 } 7708 break; 7709 case MATPRODUCT_AtB: 7710 if (product->api_user) { 7711 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7712 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7713 PetscOptionsEnd(); 7714 } else { 7715 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7716 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7717 PetscOptionsEnd(); 7718 } 7719 break; 7720 case MATPRODUCT_PtAP: 7721 if (product->api_user) { 7722 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7723 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7724 PetscOptionsEnd(); 7725 } else { 7726 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7727 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7728 PetscOptionsEnd(); 7729 } 7730 break; 7731 default: 7732 break; 7733 } 7734 match = (PetscBool)!usecpu; 7735 } 7736 #endif 7737 if (match) { 7738 switch (product->type) { 7739 case MATPRODUCT_AB: 7740 case MATPRODUCT_AtB: 7741 case MATPRODUCT_PtAP: 7742 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7743 break; 7744 default: 7745 break; 7746 } 7747 } 7748 /* fallback to MPIAIJ ops */ 7749 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7750 PetscFunctionReturn(PETSC_SUCCESS); 7751 } 7752 7753 /* 7754 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7755 7756 n - the number of block indices in cc[] 7757 cc - the block indices (must be large enough to contain the indices) 7758 */ 7759 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7760 { 7761 PetscInt cnt = -1, nidx, j; 7762 const PetscInt *idx; 7763 7764 PetscFunctionBegin; 7765 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7766 if (nidx) { 7767 cnt = 0; 7768 cc[cnt] = idx[0] / bs; 7769 for (j = 1; j < nidx; j++) { 7770 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7771 } 7772 } 7773 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7774 *n = cnt + 1; 7775 PetscFunctionReturn(PETSC_SUCCESS); 7776 } 7777 7778 /* 7779 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7780 7781 ncollapsed - the number of block indices 7782 collapsed - the block indices (must be large enough to contain the indices) 7783 */ 7784 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7785 { 7786 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7787 7788 PetscFunctionBegin; 7789 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7790 for (i = start + 1; i < start + bs; i++) { 7791 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7792 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7793 cprevtmp = cprev; 7794 cprev = merged; 7795 merged = cprevtmp; 7796 } 7797 *ncollapsed = nprev; 7798 if (collapsed) *collapsed = cprev; 7799 PetscFunctionReturn(PETSC_SUCCESS); 7800 } 7801 7802 /* 7803 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7804 7805 Input Parameter: 7806 . Amat - matrix 7807 - symmetrize - make the result symmetric 7808 + scale - scale with diagonal 7809 7810 Output Parameter: 7811 . a_Gmat - output scalar graph >= 0 7812 7813 */ 7814 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7815 { 7816 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7817 MPI_Comm comm; 7818 Mat Gmat; 7819 PetscBool ismpiaij, isseqaij; 7820 Mat a, b, c; 7821 MatType jtype; 7822 7823 PetscFunctionBegin; 7824 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7825 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7826 PetscCall(MatGetSize(Amat, &MM, &NN)); 7827 PetscCall(MatGetBlockSize(Amat, &bs)); 7828 nloc = (Iend - Istart) / bs; 7829 7830 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7831 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7832 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7833 7834 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7835 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7836 implementation */ 7837 if (bs > 1) { 7838 PetscCall(MatGetType(Amat, &jtype)); 7839 PetscCall(MatCreate(comm, &Gmat)); 7840 PetscCall(MatSetType(Gmat, jtype)); 7841 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7842 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7843 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7844 PetscInt *d_nnz, *o_nnz; 7845 MatScalar *aa, val, *AA; 7846 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7847 7848 if (isseqaij) { 7849 a = Amat; 7850 b = NULL; 7851 } else { 7852 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7853 a = d->A; 7854 b = d->B; 7855 } 7856 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7857 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7858 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7859 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7860 const PetscInt *cols1, *cols2; 7861 7862 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7863 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7864 nnz[brow / bs] = nc2 / bs; 7865 if (nc2 % bs) ok = 0; 7866 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7867 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7868 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7869 if (nc1 != nc2) ok = 0; 7870 else { 7871 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7872 if (cols1[jj] != cols2[jj]) ok = 0; 7873 if (cols1[jj] % bs != jj % bs) ok = 0; 7874 } 7875 } 7876 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7877 } 7878 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7879 if (!ok) { 7880 PetscCall(PetscFree2(d_nnz, o_nnz)); 7881 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7882 goto old_bs; 7883 } 7884 } 7885 } 7886 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7887 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7888 PetscCall(PetscFree2(d_nnz, o_nnz)); 7889 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7890 // diag 7891 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7892 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7893 7894 ai = aseq->i; 7895 n = ai[brow + 1] - ai[brow]; 7896 aj = aseq->j + ai[brow]; 7897 for (PetscInt k = 0; k < n; k += bs) { // block columns 7898 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7899 val = 0; 7900 if (index_size == 0) { 7901 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7902 aa = aseq->a + ai[brow + ii] + k; 7903 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7904 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7905 } 7906 } 7907 } else { // use (index,index) value if provided 7908 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7909 PetscInt ii = index[iii]; 7910 aa = aseq->a + ai[brow + ii] + k; 7911 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7912 PetscInt jj = index[jjj]; 7913 val += PetscAbs(PetscRealPart(aa[jj])); 7914 } 7915 } 7916 } 7917 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7918 AA[k / bs] = val; 7919 } 7920 grow = Istart / bs + brow / bs; 7921 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7922 } 7923 // off-diag 7924 if (ismpiaij) { 7925 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7926 const PetscScalar *vals; 7927 const PetscInt *cols, *garray = aij->garray; 7928 7929 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7930 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7931 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7932 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7933 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7934 AA[k / bs] = 0; 7935 AJ[cidx] = garray[cols[k]] / bs; 7936 } 7937 nc = ncols / bs; 7938 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7939 if (index_size == 0) { 7940 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7941 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7942 for (PetscInt k = 0; k < ncols; k += bs) { 7943 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7944 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7945 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7946 } 7947 } 7948 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7949 } 7950 } else { // use (index,index) value if provided 7951 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7952 PetscInt ii = index[iii]; 7953 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7954 for (PetscInt k = 0; k < ncols; k += bs) { 7955 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7956 PetscInt jj = index[jjj]; 7957 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7958 } 7959 } 7960 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7961 } 7962 } 7963 grow = Istart / bs + brow / bs; 7964 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7965 } 7966 } 7967 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7968 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7969 PetscCall(PetscFree2(AA, AJ)); 7970 } else { 7971 const PetscScalar *vals; 7972 const PetscInt *idx; 7973 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7974 old_bs: 7975 /* 7976 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7977 */ 7978 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7979 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7980 if (isseqaij) { 7981 PetscInt max_d_nnz; 7982 7983 /* 7984 Determine exact preallocation count for (sequential) scalar matrix 7985 */ 7986 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7987 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7988 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7989 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7990 PetscCall(PetscFree3(w0, w1, w2)); 7991 } else if (ismpiaij) { 7992 Mat Daij, Oaij; 7993 const PetscInt *garray; 7994 PetscInt max_d_nnz; 7995 7996 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7997 /* 7998 Determine exact preallocation count for diagonal block portion of scalar matrix 7999 */ 8000 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 8001 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 8002 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 8003 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 8004 PetscCall(PetscFree3(w0, w1, w2)); 8005 /* 8006 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 8007 */ 8008 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 8009 o_nnz[jj] = 0; 8010 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 8011 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8012 o_nnz[jj] += ncols; 8013 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8014 } 8015 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8016 } 8017 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8018 /* get scalar copy (norms) of matrix */ 8019 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8020 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8021 PetscCall(PetscFree2(d_nnz, o_nnz)); 8022 for (Ii = Istart; Ii < Iend; Ii++) { 8023 PetscInt dest_row = Ii / bs; 8024 8025 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8026 for (jj = 0; jj < ncols; jj++) { 8027 PetscInt dest_col = idx[jj] / bs; 8028 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8029 8030 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8031 } 8032 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8033 } 8034 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8035 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8036 } 8037 } else { 8038 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8039 else { 8040 Gmat = Amat; 8041 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8042 } 8043 if (isseqaij) { 8044 a = Gmat; 8045 b = NULL; 8046 } else { 8047 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8048 a = d->A; 8049 b = d->B; 8050 } 8051 if (filter >= 0 || scale) { 8052 /* take absolute value of each entry */ 8053 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8054 MatInfo info; 8055 PetscScalar *avals; 8056 8057 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8058 PetscCall(MatSeqAIJGetArray(c, &avals)); 8059 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8060 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8061 } 8062 } 8063 } 8064 if (symmetrize) { 8065 PetscBool isset, issym; 8066 8067 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8068 if (!isset || !issym) { 8069 Mat matTrans; 8070 8071 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8072 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8073 PetscCall(MatDestroy(&matTrans)); 8074 } 8075 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8076 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8077 if (scale) { 8078 /* scale c for all diagonal values = 1 or -1 */ 8079 Vec diag; 8080 8081 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8082 PetscCall(MatGetDiagonal(Gmat, diag)); 8083 PetscCall(VecReciprocal(diag)); 8084 PetscCall(VecSqrtAbs(diag)); 8085 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8086 PetscCall(VecDestroy(&diag)); 8087 } 8088 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8089 if (filter >= 0) { 8090 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8091 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8092 } 8093 *a_Gmat = Gmat; 8094 PetscFunctionReturn(PETSC_SUCCESS); 8095 } 8096 8097 /* 8098 Special version for direct calls from Fortran 8099 */ 8100 8101 /* Change these macros so can be used in void function */ 8102 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8103 #undef PetscCall 8104 #define PetscCall(...) \ 8105 do { \ 8106 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8107 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8108 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8109 return; \ 8110 } \ 8111 } while (0) 8112 8113 #undef SETERRQ 8114 #define SETERRQ(comm, ierr, ...) \ 8115 do { \ 8116 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8117 return; \ 8118 } while (0) 8119 8120 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8121 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8122 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8123 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8124 #else 8125 #endif 8126 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8127 { 8128 Mat mat = *mmat; 8129 PetscInt m = *mm, n = *mn; 8130 InsertMode addv = *maddv; 8131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8132 PetscScalar value; 8133 8134 MatCheckPreallocated(mat, 1); 8135 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8136 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8137 { 8138 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8139 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8140 PetscBool roworiented = aij->roworiented; 8141 8142 /* Some Variables required in the macro */ 8143 Mat A = aij->A; 8144 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8145 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8146 MatScalar *aa; 8147 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8148 Mat B = aij->B; 8149 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8150 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8151 MatScalar *ba; 8152 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8153 * cannot use "#if defined" inside a macro. */ 8154 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8155 8156 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8157 PetscInt nonew = a->nonew; 8158 MatScalar *ap1, *ap2; 8159 8160 PetscFunctionBegin; 8161 PetscCall(MatSeqAIJGetArray(A, &aa)); 8162 PetscCall(MatSeqAIJGetArray(B, &ba)); 8163 for (i = 0; i < m; i++) { 8164 if (im[i] < 0) continue; 8165 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8166 if (im[i] >= rstart && im[i] < rend) { 8167 row = im[i] - rstart; 8168 lastcol1 = -1; 8169 rp1 = aj + ai[row]; 8170 ap1 = aa + ai[row]; 8171 rmax1 = aimax[row]; 8172 nrow1 = ailen[row]; 8173 low1 = 0; 8174 high1 = nrow1; 8175 lastcol2 = -1; 8176 rp2 = bj + bi[row]; 8177 ap2 = ba + bi[row]; 8178 rmax2 = bimax[row]; 8179 nrow2 = bilen[row]; 8180 low2 = 0; 8181 high2 = nrow2; 8182 8183 for (j = 0; j < n; j++) { 8184 if (roworiented) value = v[i * n + j]; 8185 else value = v[i + j * m]; 8186 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8187 if (in[j] >= cstart && in[j] < cend) { 8188 col = in[j] - cstart; 8189 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8190 } else if (in[j] < 0) continue; 8191 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8192 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8193 } else { 8194 if (mat->was_assembled) { 8195 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8196 #if defined(PETSC_USE_CTABLE) 8197 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8198 col--; 8199 #else 8200 col = aij->colmap[in[j]] - 1; 8201 #endif 8202 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8203 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8204 col = in[j]; 8205 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8206 B = aij->B; 8207 b = (Mat_SeqAIJ *)B->data; 8208 bimax = b->imax; 8209 bi = b->i; 8210 bilen = b->ilen; 8211 bj = b->j; 8212 rp2 = bj + bi[row]; 8213 ap2 = ba + bi[row]; 8214 rmax2 = bimax[row]; 8215 nrow2 = bilen[row]; 8216 low2 = 0; 8217 high2 = nrow2; 8218 bm = aij->B->rmap->n; 8219 ba = b->a; 8220 inserted = PETSC_FALSE; 8221 } 8222 } else col = in[j]; 8223 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8224 } 8225 } 8226 } else if (!aij->donotstash) { 8227 if (roworiented) { 8228 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8229 } else { 8230 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8231 } 8232 } 8233 } 8234 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8235 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8236 } 8237 PetscFunctionReturnVoid(); 8238 } 8239 8240 /* Undefining these here since they were redefined from their original definition above! No 8241 * other PETSc functions should be defined past this point, as it is impossible to recover the 8242 * original definitions */ 8243 #undef PetscCall 8244 #undef SETERRQ 8245