1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (preallocation can be improved) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1334 PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1335 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1336 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1337 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1338 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1339 1340 /* Defaults to identity */ 1341 for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1342 1343 /* Create discrete gradient for the coarser level if needed */ 1344 PetscCall(MatDestroy(&pcbddc->nedcG)); 1345 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1346 if (pcbddc->current_level < pcbddc->max_levels) { 1347 ISLocalToGlobalMapping cel2g, cvl2g; 1348 IS wis, gwis; 1349 PetscInt cnv, cne; 1350 1351 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1352 if (fl2g) { 1353 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1354 } else { 1355 PetscCall(PetscObjectReference((PetscObject)wis)); 1356 pcbddc->nedclocal = wis; 1357 } 1358 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1359 PetscCall(ISDestroy(&wis)); 1360 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1361 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1362 PetscCall(ISDestroy(&wis)); 1363 PetscCall(ISDestroy(&gwis)); 1364 1365 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1366 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1367 PetscCall(ISDestroy(&wis)); 1368 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1369 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1370 PetscCall(ISDestroy(&wis)); 1371 PetscCall(ISDestroy(&gwis)); 1372 1373 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1374 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1375 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1376 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1377 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1378 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1379 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1380 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1381 } 1382 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1383 1384 #if defined(PRINT_GDET) 1385 inc = 0; 1386 lev = pcbddc->current_level; 1387 #endif 1388 1389 /* Insert values in the change of basis matrix */ 1390 for (i = 0; i < nee; i++) { 1391 Mat Gins = NULL, GKins = NULL; 1392 IS cornersis = NULL; 1393 PetscScalar cvals[2]; 1394 1395 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1396 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1397 if (Gins && GKins) { 1398 const PetscScalar *data; 1399 const PetscInt *rows, *cols; 1400 PetscInt nrh, nch, nrc, ncc; 1401 1402 PetscCall(ISGetIndices(eedges[i], &cols)); 1403 /* H1 */ 1404 PetscCall(ISGetIndices(extrows[i], &rows)); 1405 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1406 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1407 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1408 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1409 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1410 /* complement */ 1411 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1412 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1413 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1414 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1415 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1416 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1417 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1418 1419 /* coarse discrete gradient */ 1420 if (pcbddc->nedcG) { 1421 PetscInt cols[2]; 1422 1423 cols[0] = 2 * i; 1424 cols[1] = 2 * i + 1; 1425 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1426 } 1427 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1428 } 1429 PetscCall(ISDestroy(&extrows[i])); 1430 PetscCall(ISDestroy(&extcols[i])); 1431 PetscCall(ISDestroy(&cornersis)); 1432 PetscCall(MatDestroy(&Gins)); 1433 PetscCall(MatDestroy(&GKins)); 1434 } 1435 1436 /* for FDM element-by-element: first dof on the edge only constraint. Why? */ 1437 if (elements_corners && pcbddc->mat_graph->multi_element) { 1438 ISLocalToGlobalMapping map; 1439 MatNullSpace nnsp; 1440 Vec quad_vec; 1441 1442 PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL)); 1443 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp)); 1444 PetscCall(VecLockReadPop(quad_vec)); 1445 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 1446 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1447 for (i = 0; i < nee; i++) { 1448 const PetscInt *idxs; 1449 PetscScalar one = 1.0; 1450 1451 PetscCall(ISGetLocalSize(alleedges[i], &cum)); 1452 if (!cum) continue; 1453 PetscCall(ISGetIndices(alleedges[i], &idxs)); 1454 PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES)); 1455 PetscCall(ISRestoreIndices(alleedges[i], &idxs)); 1456 } 1457 PetscCall(VecLockReadPush(quad_vec)); 1458 PetscCall(VecDestroy(&quad_vec)); 1459 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1460 PetscCall(MatNullSpaceDestroy(&nnsp)); 1461 } 1462 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1463 1464 /* Start assembling */ 1465 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1466 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1467 1468 /* Free */ 1469 if (fl2g) { 1470 PetscCall(ISDestroy(&primals)); 1471 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1472 PetscCall(PetscFree(eedges)); 1473 } 1474 1475 /* hack mat_graph with primal dofs on the coarse edges */ 1476 { 1477 PCBDDCGraph graph = pcbddc->mat_graph; 1478 PetscInt *oqueue = graph->queue; 1479 PetscInt *ocptr = graph->cptr; 1480 PetscInt ncc, *idxs; 1481 1482 /* find first primal edge */ 1483 if (pcbddc->nedclocal) { 1484 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1485 } else { 1486 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1487 idxs = cedges; 1488 } 1489 cum = 0; 1490 while (cum < nee && cedges[cum] < 0) cum++; 1491 1492 /* adapt connected components */ 1493 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1494 graph->cptr[0] = 0; 1495 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1496 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1497 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1498 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1499 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1500 ncc++; 1501 lc--; 1502 cum++; 1503 while (cum < nee && cedges[cum] < 0) cum++; 1504 } 1505 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1506 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1507 ncc++; 1508 } 1509 graph->ncc = ncc; 1510 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1511 PetscCall(PetscFree2(ocptr, oqueue)); 1512 } 1513 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1514 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1515 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1516 1517 PetscCall(ISDestroy(&nedfieldlocal)); 1518 PetscCall(PetscFree(extrow)); 1519 PetscCall(PetscFree2(work, rwork)); 1520 PetscCall(PetscFree(corners)); 1521 PetscCall(PetscFree(cedges)); 1522 PetscCall(PetscFree(extrows)); 1523 PetscCall(PetscFree(extcols)); 1524 PetscCall(MatDestroy(&lG)); 1525 1526 /* Complete assembling */ 1527 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1528 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1529 if (pcbddc->nedcG) { 1530 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1531 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1532 } 1533 1534 PetscCall(ISDestroy(&elements_corners)); 1535 1536 /* set change of basis */ 1537 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1538 PetscCall(MatDestroy(&T)); 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541 1542 /* the near-null space of BDDC carries information on quadrature weights, 1543 and these can be collinear -> so cheat with MatNullSpaceCreate 1544 and create a suitable set of basis vectors first */ 1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1546 { 1547 PetscInt i; 1548 1549 PetscFunctionBegin; 1550 for (i = 0; i < nvecs; i++) { 1551 PetscInt first, last; 1552 1553 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1554 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1555 if (i >= first && i < last) { 1556 PetscScalar *data; 1557 PetscCall(VecGetArray(quad_vecs[i], &data)); 1558 if (!has_const) { 1559 data[i - first] = 1.; 1560 } else { 1561 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1562 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1563 } 1564 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1565 } 1566 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1567 } 1568 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1569 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1570 PetscInt first, last; 1571 PetscCall(VecLockReadPop(quad_vecs[i])); 1572 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1573 if (i >= first && i < last) { 1574 PetscScalar *data; 1575 PetscCall(VecGetArray(quad_vecs[i], &data)); 1576 if (!has_const) { 1577 data[i - first] = 0.; 1578 } else { 1579 data[2 * i - first] = 0.; 1580 data[2 * i - first + 1] = 0.; 1581 } 1582 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1583 } 1584 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1585 PetscCall(VecLockReadPush(quad_vecs[i])); 1586 } 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1591 { 1592 Mat loc_divudotp; 1593 Vec p, v, quad_vec; 1594 ISLocalToGlobalMapping map; 1595 PetscScalar *array; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1599 if (!transpose) { 1600 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1601 } else { 1602 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1603 } 1604 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1605 PetscCall(VecLockReadPop(quad_vec)); 1606 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1607 1608 /* compute local quad vec */ 1609 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1610 if (!transpose) { 1611 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1612 } else { 1613 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1614 } 1615 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1616 PetscCall(VecSet(p, 1.)); 1617 if (!transpose) { 1618 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1619 } else { 1620 PetscCall(MatMult(loc_divudotp, p, v)); 1621 } 1622 PetscCall(VecDestroy(&p)); 1623 if (vl2l) { 1624 Mat lA; 1625 VecScatter sc; 1626 Vec vins; 1627 1628 PetscCall(MatISGetLocalMat(A, &lA)); 1629 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1630 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1631 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1632 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1633 PetscCall(VecScatterDestroy(&sc)); 1634 PetscCall(VecDestroy(&v)); 1635 v = vins; 1636 } 1637 1638 /* mask summation of interface values */ 1639 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1640 const PetscInt *degree; 1641 PetscSF msf; 1642 1643 PetscCall(VecGetLocalSize(v, &n)); 1644 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1645 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1646 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1647 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1648 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1649 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1650 for (PetscInt i = 0, c = 0; i < nr; i++) { 1651 mmask[c] = 1; 1652 c += degree[i]; 1653 } 1654 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1655 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1656 PetscCall(VecGetArray(v, &array)); 1657 for (PetscInt i = 0; i < n; i++) { 1658 array[i] *= mask[i]; 1659 idxs[i] = i; 1660 } 1661 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1662 PetscCall(VecRestoreArray(v, &array)); 1663 PetscCall(PetscFree3(mmask, mask, idxs)); 1664 PetscCall(VecDestroy(&v)); 1665 PetscCall(VecAssemblyBegin(quad_vec)); 1666 PetscCall(VecAssemblyEnd(quad_vec)); 1667 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1668 PetscCall(VecLockReadPush(quad_vec)); 1669 PetscCall(VecDestroy(&quad_vec)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1674 { 1675 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1676 1677 PetscFunctionBegin; 1678 if (primalv) { 1679 if (pcbddc->user_primal_vertices_local) { 1680 IS list[2], newp; 1681 1682 list[0] = primalv; 1683 list[1] = pcbddc->user_primal_vertices_local; 1684 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1685 PetscCall(ISSortRemoveDups(newp)); 1686 PetscCall(ISDestroy(&list[1])); 1687 pcbddc->user_primal_vertices_local = newp; 1688 } else { 1689 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1690 } 1691 } 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1696 { 1697 PetscInt f, *comp = (PetscInt *)ctx; 1698 1699 PetscFunctionBegin; 1700 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1705 { 1706 Vec local, global; 1707 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1708 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1709 PetscBool monolithic = PETSC_FALSE; 1710 1711 PetscFunctionBegin; 1712 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1713 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1714 PetscOptionsEnd(); 1715 /* need to convert from global to local topology information and remove references to information in global ordering */ 1716 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1717 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1718 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1719 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1720 if (monolithic) { /* just get block size to properly compute vertices */ 1721 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1722 goto boundary; 1723 } 1724 1725 if (pcbddc->user_provided_isfordofs) { 1726 if (pcbddc->n_ISForDofs) { 1727 PetscInt i; 1728 1729 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1730 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1731 PetscInt bs; 1732 1733 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1734 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1735 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1736 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1737 } 1738 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1739 pcbddc->n_ISForDofs = 0; 1740 PetscCall(PetscFree(pcbddc->ISForDofs)); 1741 } 1742 } else { 1743 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1744 DM dm; 1745 1746 PetscCall(MatGetDM(pc->pmat, &dm)); 1747 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1748 if (dm) { 1749 IS *fields; 1750 PetscInt nf, i; 1751 1752 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1753 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1754 for (i = 0; i < nf; i++) { 1755 PetscInt bs; 1756 1757 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1758 PetscCall(ISGetBlockSize(fields[i], &bs)); 1759 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1760 PetscCall(ISDestroy(&fields[i])); 1761 } 1762 PetscCall(PetscFree(fields)); 1763 pcbddc->n_ISForDofsLocal = nf; 1764 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1765 PetscContainer c; 1766 1767 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1768 if (c) { 1769 MatISLocalFields lf; 1770 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1771 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1772 } else { /* fallback, create the default fields if bs > 1 */ 1773 PetscInt i, n = matis->A->rmap->n; 1774 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1775 if (i > 1) { 1776 pcbddc->n_ISForDofsLocal = i; 1777 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1778 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1779 } 1780 } 1781 } 1782 } else { 1783 PetscInt i; 1784 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1785 } 1786 } 1787 1788 boundary: 1789 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1790 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1791 } else if (pcbddc->DirichletBoundariesLocal) { 1792 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1793 } 1794 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1795 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1796 } else if (pcbddc->NeumannBoundariesLocal) { 1797 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1798 } 1799 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1800 PetscCall(VecDestroy(&global)); 1801 PetscCall(VecDestroy(&local)); 1802 /* detect local disconnected subdomains if requested or needed */ 1803 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1804 IS primalv = NULL; 1805 PetscInt nel; 1806 PetscBool filter = pcbddc->detect_disconnected_filter; 1807 1808 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1809 PetscCall(PetscFree(pcbddc->local_subs)); 1810 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1811 if (matis->allow_repeated && nel) { 1812 const PetscInt *elsizes; 1813 1814 pcbddc->n_local_subs = nel; 1815 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1816 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1817 for (PetscInt i = 0, c = 0; i < nel; i++) { 1818 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1819 c += elsizes[i]; 1820 } 1821 } else { 1822 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1823 } 1824 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1825 PetscCall(ISDestroy(&primalv)); 1826 } 1827 /* early stage corner detection */ 1828 { 1829 DM dm; 1830 1831 PetscCall(MatGetDM(pc->pmat, &dm)); 1832 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1833 if (dm) { 1834 PetscBool isda; 1835 1836 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1837 if (isda) { 1838 ISLocalToGlobalMapping l2l; 1839 IS corners; 1840 Mat lA; 1841 PetscBool gl, lo; 1842 1843 { 1844 Vec cvec; 1845 const PetscScalar *coords; 1846 PetscInt dof, n, cdim; 1847 PetscBool memc = PETSC_TRUE; 1848 1849 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1850 PetscCall(DMGetCoordinates(dm, &cvec)); 1851 PetscCall(VecGetLocalSize(cvec, &n)); 1852 PetscCall(VecGetBlockSize(cvec, &cdim)); 1853 n /= cdim; 1854 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1855 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1856 PetscCall(VecGetArrayRead(cvec, &coords)); 1857 #if defined(PETSC_USE_COMPLEX) 1858 memc = PETSC_FALSE; 1859 #endif 1860 if (dof != 1) memc = PETSC_FALSE; 1861 if (memc) { 1862 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1863 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1864 PetscReal *bcoords = pcbddc->mat_graph->coords; 1865 PetscInt i, b, d; 1866 1867 for (i = 0; i < n; i++) { 1868 for (b = 0; b < dof; b++) { 1869 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1870 } 1871 } 1872 } 1873 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1874 pcbddc->mat_graph->cdim = cdim; 1875 pcbddc->mat_graph->cnloc = dof * n; 1876 pcbddc->mat_graph->cloc = PETSC_FALSE; 1877 } 1878 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1879 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1880 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1881 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1882 lo = (PetscBool)(l2l && corners); 1883 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1884 if (gl) { /* From PETSc's DMDA */ 1885 const PetscInt *idx; 1886 PetscInt dof, bs, *idxout, n; 1887 1888 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1889 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1890 PetscCall(ISGetLocalSize(corners, &n)); 1891 PetscCall(ISGetIndices(corners, &idx)); 1892 if (bs == dof) { 1893 PetscCall(PetscMalloc1(n, &idxout)); 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1895 } else { /* the original DMDA local-to-local map have been modified */ 1896 PetscInt i, d; 1897 1898 PetscCall(PetscMalloc1(dof * n, &idxout)); 1899 for (i = 0; i < n; i++) 1900 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1901 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1902 1903 bs = 1; 1904 n *= dof; 1905 } 1906 PetscCall(ISRestoreIndices(corners, &idx)); 1907 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1908 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1909 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1910 PetscCall(ISDestroy(&corners)); 1911 pcbddc->corner_selected = PETSC_TRUE; 1912 pcbddc->corner_selection = PETSC_TRUE; 1913 } 1914 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1915 } 1916 } 1917 } 1918 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1919 DM dm; 1920 1921 PetscCall(MatGetDM(pc->pmat, &dm)); 1922 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1923 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1924 Vec vcoords; 1925 PetscSection section; 1926 PetscReal *coords; 1927 PetscInt d, cdim, nl, nf, **ctxs; 1928 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1929 /* debug coordinates */ 1930 PetscViewer viewer; 1931 PetscBool flg; 1932 PetscViewerFormat format; 1933 const char *prefix; 1934 1935 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1936 PetscCall(DMGetLocalSection(dm, §ion)); 1937 PetscCall(PetscSectionGetNumFields(section, &nf)); 1938 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1939 PetscCall(VecGetLocalSize(vcoords, &nl)); 1940 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1941 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1942 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1943 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1944 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1945 1946 /* debug coordinates */ 1947 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1948 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1949 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1950 for (d = 0; d < cdim; d++) { 1951 PetscInt i; 1952 const PetscScalar *v; 1953 char name[16]; 1954 1955 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1956 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 1957 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1958 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1959 if (flg) PetscCall(VecView(vcoords, viewer)); 1960 PetscCall(VecGetArrayRead(vcoords, &v)); 1961 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1962 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1963 } 1964 PetscCall(VecDestroy(&vcoords)); 1965 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1966 PetscCall(PetscFree(coords)); 1967 PetscCall(PetscFree(ctxs[0])); 1968 PetscCall(PetscFree2(funcs, ctxs)); 1969 if (flg) { 1970 PetscCall(PetscViewerPopFormat(viewer)); 1971 PetscCall(PetscViewerDestroy(&viewer)); 1972 } 1973 } 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1979 { 1980 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1981 IS nis; 1982 const PetscInt *idxs; 1983 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1984 1985 PetscFunctionBegin; 1986 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1987 if (mop == MPI_LAND) { 1988 /* init rootdata with true */ 1989 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1990 } else { 1991 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1992 } 1993 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1994 PetscCall(ISGetLocalSize(*is, &nd)); 1995 PetscCall(ISGetIndices(*is, &idxs)); 1996 for (i = 0; i < nd; i++) 1997 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1998 PetscCall(ISRestoreIndices(*is, &idxs)); 1999 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2000 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2001 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2002 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2003 if (mop == MPI_LAND) { 2004 PetscCall(PetscMalloc1(nd, &nidxs)); 2005 } else { 2006 PetscCall(PetscMalloc1(n, &nidxs)); 2007 } 2008 for (i = 0, nnd = 0; i < n; i++) 2009 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2010 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2011 PetscCall(ISDestroy(is)); 2012 *is = nis; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2017 { 2018 PC_IS *pcis = (PC_IS *)pc->data; 2019 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2020 2021 PetscFunctionBegin; 2022 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2023 if (pcbddc->ChangeOfBasisMatrix) { 2024 Vec swap; 2025 2026 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2027 swap = pcbddc->work_change; 2028 pcbddc->work_change = r; 2029 r = swap; 2030 } 2031 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2033 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2034 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2035 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2036 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2037 PetscCall(VecSet(z, 0.)); 2038 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2039 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2040 if (pcbddc->ChangeOfBasisMatrix) { 2041 pcbddc->work_change = r; 2042 PetscCall(VecCopy(z, pcbddc->work_change)); 2043 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2044 } 2045 PetscFunctionReturn(PETSC_SUCCESS); 2046 } 2047 2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2049 { 2050 PCBDDCBenignMatMult_ctx ctx; 2051 PetscBool apply_right, apply_left, reset_x; 2052 2053 PetscFunctionBegin; 2054 PetscCall(MatShellGetContext(A, &ctx)); 2055 if (transpose) { 2056 apply_right = ctx->apply_left; 2057 apply_left = ctx->apply_right; 2058 } else { 2059 apply_right = ctx->apply_right; 2060 apply_left = ctx->apply_left; 2061 } 2062 reset_x = PETSC_FALSE; 2063 if (apply_right) { 2064 const PetscScalar *ax; 2065 PetscInt nl, i; 2066 2067 PetscCall(VecGetLocalSize(x, &nl)); 2068 PetscCall(VecGetArrayRead(x, &ax)); 2069 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2070 PetscCall(VecRestoreArrayRead(x, &ax)); 2071 for (i = 0; i < ctx->benign_n; i++) { 2072 PetscScalar sum, val; 2073 const PetscInt *idxs; 2074 PetscInt nz, j; 2075 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2076 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2077 sum = 0.; 2078 if (ctx->apply_p0) { 2079 val = ctx->work[idxs[nz - 1]]; 2080 for (j = 0; j < nz - 1; j++) { 2081 sum += ctx->work[idxs[j]]; 2082 ctx->work[idxs[j]] += val; 2083 } 2084 } else { 2085 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2086 } 2087 ctx->work[idxs[nz - 1]] -= sum; 2088 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2089 } 2090 PetscCall(VecPlaceArray(x, ctx->work)); 2091 reset_x = PETSC_TRUE; 2092 } 2093 if (transpose) { 2094 PetscCall(MatMultTranspose(ctx->A, x, y)); 2095 } else { 2096 PetscCall(MatMult(ctx->A, x, y)); 2097 } 2098 if (reset_x) PetscCall(VecResetArray(x)); 2099 if (apply_left) { 2100 PetscScalar *ay; 2101 PetscInt i; 2102 2103 PetscCall(VecGetArray(y, &ay)); 2104 for (i = 0; i < ctx->benign_n; i++) { 2105 PetscScalar sum, val; 2106 const PetscInt *idxs; 2107 PetscInt nz, j; 2108 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2109 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2110 val = -ay[idxs[nz - 1]]; 2111 if (ctx->apply_p0) { 2112 sum = 0.; 2113 for (j = 0; j < nz - 1; j++) { 2114 sum += ay[idxs[j]]; 2115 ay[idxs[j]] += val; 2116 } 2117 ay[idxs[nz - 1]] += sum; 2118 } else { 2119 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2120 ay[idxs[nz - 1]] = 0.; 2121 } 2122 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2123 } 2124 PetscCall(VecRestoreArray(y, &ay)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2130 { 2131 PetscFunctionBegin; 2132 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2137 { 2138 PetscFunctionBegin; 2139 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2140 PetscFunctionReturn(PETSC_SUCCESS); 2141 } 2142 2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2144 { 2145 PC_IS *pcis = (PC_IS *)pc->data; 2146 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2147 PCBDDCBenignMatMult_ctx ctx; 2148 2149 PetscFunctionBegin; 2150 if (!restore) { 2151 Mat A_IB, A_BI; 2152 PetscScalar *work; 2153 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2154 2155 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2156 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2157 PetscCall(PetscMalloc1(pcis->n, &work)); 2158 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2159 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2160 PetscCall(MatSetType(A_IB, MATSHELL)); 2161 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2162 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2163 PetscCall(PetscNew(&ctx)); 2164 PetscCall(MatShellSetContext(A_IB, ctx)); 2165 ctx->apply_left = PETSC_TRUE; 2166 ctx->apply_right = PETSC_FALSE; 2167 ctx->apply_p0 = PETSC_FALSE; 2168 ctx->benign_n = pcbddc->benign_n; 2169 if (reuse) { 2170 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2171 ctx->free = PETSC_FALSE; 2172 } else { /* TODO: could be optimized for successive solves */ 2173 ISLocalToGlobalMapping N_to_D; 2174 PetscInt i; 2175 2176 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2177 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2178 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2179 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2180 ctx->free = PETSC_TRUE; 2181 } 2182 ctx->A = pcis->A_IB; 2183 ctx->work = work; 2184 PetscCall(MatSetUp(A_IB)); 2185 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2187 pcis->A_IB = A_IB; 2188 2189 /* A_BI as A_IB^T */ 2190 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2191 pcbddc->benign_original_mat = pcis->A_BI; 2192 pcis->A_BI = A_BI; 2193 } else { 2194 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2195 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2196 PetscCall(MatDestroy(&pcis->A_IB)); 2197 pcis->A_IB = ctx->A; 2198 ctx->A = NULL; 2199 PetscCall(MatDestroy(&pcis->A_BI)); 2200 pcis->A_BI = pcbddc->benign_original_mat; 2201 pcbddc->benign_original_mat = NULL; 2202 if (ctx->free) { 2203 PetscInt i; 2204 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2205 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2206 } 2207 PetscCall(PetscFree(ctx->work)); 2208 PetscCall(PetscFree(ctx)); 2209 } 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 /* used just in bddc debug mode */ 2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2217 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2218 Mat An; 2219 2220 PetscFunctionBegin; 2221 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2222 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2223 if (is1) { 2224 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2225 PetscCall(MatDestroy(&An)); 2226 } else { 2227 *B = An; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 /* TODO: add reuse flag */ 2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2234 { 2235 Mat Bt; 2236 PetscScalar *a, *bdata; 2237 const PetscInt *ii, *ij; 2238 PetscInt m, n, i, nnz, *bii, *bij; 2239 PetscBool flg_row; 2240 2241 PetscFunctionBegin; 2242 PetscCall(MatGetSize(A, &n, &m)); 2243 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2244 PetscCall(MatSeqAIJGetArray(A, &a)); 2245 nnz = n; 2246 for (i = 0; i < ii[n]; i++) { 2247 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2248 } 2249 PetscCall(PetscMalloc1(n + 1, &bii)); 2250 PetscCall(PetscMalloc1(nnz, &bij)); 2251 PetscCall(PetscMalloc1(nnz, &bdata)); 2252 nnz = 0; 2253 bii[0] = 0; 2254 for (i = 0; i < n; i++) { 2255 PetscInt j; 2256 for (j = ii[i]; j < ii[i + 1]; j++) { 2257 PetscScalar entry = a[j]; 2258 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2259 bij[nnz] = ij[j]; 2260 bdata[nnz] = entry; 2261 nnz++; 2262 } 2263 } 2264 bii[i + 1] = nnz; 2265 } 2266 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2267 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2268 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2269 { 2270 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2271 b->free_a = PETSC_TRUE; 2272 b->free_ij = PETSC_TRUE; 2273 } 2274 if (*B == A) PetscCall(MatDestroy(&A)); 2275 *B = Bt; 2276 PetscFunctionReturn(PETSC_SUCCESS); 2277 } 2278 2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2280 { 2281 Mat B = NULL; 2282 DM dm; 2283 IS is_dummy, *cc_n; 2284 ISLocalToGlobalMapping l2gmap_dummy; 2285 PCBDDCGraph graph; 2286 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2287 PetscInt i, n; 2288 PetscInt *xadj, *adjncy; 2289 PetscBool isplex = PETSC_FALSE; 2290 2291 PetscFunctionBegin; 2292 if (ncc) *ncc = 0; 2293 if (cc) *cc = NULL; 2294 if (primalv) *primalv = NULL; 2295 PetscCall(PCBDDCGraphCreate(&graph)); 2296 PetscCall(MatGetDM(pc->pmat, &dm)); 2297 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2298 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2299 if (filter) isplex = PETSC_FALSE; 2300 2301 if (isplex) { /* this code has been modified from plexpartition.c */ 2302 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2303 PetscInt *adj = NULL; 2304 IS cellNumbering; 2305 const PetscInt *cellNum; 2306 PetscBool useCone, useClosure; 2307 PetscSection section; 2308 PetscSegBuffer adjBuffer; 2309 PetscSF sfPoint; 2310 2311 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2312 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2313 PetscCall(DMGetPointSF(dm, &sfPoint)); 2314 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2315 /* Build adjacency graph via a section/segbuffer */ 2316 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2317 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2318 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2319 /* Always use FVM adjacency to create partitioner graph */ 2320 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2321 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2322 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2323 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2324 for (n = 0, p = pStart; p < pEnd; p++) { 2325 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2326 if (nroots > 0) { 2327 if (cellNum[p] < 0) continue; 2328 } 2329 adjSize = PETSC_DETERMINE; 2330 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2331 for (a = 0; a < adjSize; ++a) { 2332 const PetscInt point = adj[a]; 2333 if (pStart <= point && point < pEnd) { 2334 PetscInt *PETSC_RESTRICT pBuf; 2335 PetscCall(PetscSectionAddDof(section, p, 1)); 2336 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2337 *pBuf = point; 2338 } 2339 } 2340 n++; 2341 } 2342 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2343 /* Derive CSR graph from section/segbuffer */ 2344 PetscCall(PetscSectionSetUp(section)); 2345 PetscCall(PetscSectionGetStorageSize(section, &size)); 2346 PetscCall(PetscMalloc1(n + 1, &xadj)); 2347 for (idx = 0, p = pStart; p < pEnd; p++) { 2348 if (nroots > 0) { 2349 if (cellNum[p] < 0) continue; 2350 } 2351 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2352 } 2353 xadj[n] = size; 2354 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2355 /* Clean up */ 2356 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2357 PetscCall(PetscSectionDestroy(§ion)); 2358 PetscCall(PetscFree(adj)); 2359 graph->xadj = xadj; 2360 graph->adjncy = adjncy; 2361 } else { 2362 Mat A; 2363 PetscBool isseqaij, flg_row; 2364 2365 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2366 if (!A->rmap->N || !A->cmap->N) { 2367 PetscCall(PCBDDCGraphDestroy(&graph)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } 2370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2371 if (!isseqaij && filter) { 2372 PetscBool isseqdense; 2373 2374 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2375 if (!isseqdense) { 2376 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2377 } else { /* TODO: rectangular case and LDA */ 2378 PetscScalar *array; 2379 PetscReal chop = 1.e-6; 2380 2381 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2382 PetscCall(MatDenseGetArray(B, &array)); 2383 PetscCall(MatGetSize(B, &n, NULL)); 2384 for (i = 0; i < n; i++) { 2385 PetscInt j; 2386 for (j = i + 1; j < n; j++) { 2387 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2388 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2389 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2390 } 2391 } 2392 PetscCall(MatDenseRestoreArray(B, &array)); 2393 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2394 } 2395 } else { 2396 PetscCall(PetscObjectReference((PetscObject)A)); 2397 B = A; 2398 } 2399 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2400 2401 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2402 if (filter) { 2403 PetscScalar *data; 2404 PetscInt j, cum; 2405 2406 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2407 PetscCall(MatSeqAIJGetArray(B, &data)); 2408 cum = 0; 2409 for (i = 0; i < n; i++) { 2410 PetscInt t; 2411 2412 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2413 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2414 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2415 } 2416 t = xadj_filtered[i]; 2417 xadj_filtered[i] = cum; 2418 cum += t; 2419 } 2420 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2421 graph->xadj = xadj_filtered; 2422 graph->adjncy = adjncy_filtered; 2423 } else { 2424 graph->xadj = xadj; 2425 graph->adjncy = adjncy; 2426 } 2427 } 2428 /* compute local connected components using PCBDDCGraph */ 2429 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2430 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2431 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2432 PetscCall(ISDestroy(&is_dummy)); 2433 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2434 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2435 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2436 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2437 2438 /* partial clean up */ 2439 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2440 if (B) { 2441 PetscBool flg_row; 2442 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2443 PetscCall(MatDestroy(&B)); 2444 } 2445 if (isplex) { 2446 PetscCall(PetscFree(xadj)); 2447 PetscCall(PetscFree(adjncy)); 2448 } 2449 2450 /* get back data */ 2451 if (isplex) { 2452 if (ncc) *ncc = graph->ncc; 2453 if (cc || primalv) { 2454 Mat A; 2455 PetscBT btv, btvt, btvc; 2456 PetscSection subSection; 2457 PetscInt *ids, cum, cump, *cids, *pids; 2458 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2459 2460 PetscCall(DMGetDimension(dm, &dim)); 2461 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2462 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2463 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2464 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2465 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2466 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2467 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2468 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2469 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2470 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2471 2472 /* First see if we find corners for the subdomains, i.e. a vertex 2473 shared by at least dim subdomain boundary faces. This does not 2474 cover all the possible cases with simplices but it is enough 2475 for tensor cells */ 2476 if (vStart != fStart && dim <= 3) { 2477 for (PetscInt c = cStart; c < cEnd; c++) { 2478 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2479 const PetscInt *faces; 2480 2481 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2482 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2483 PetscCall(DMPlexGetCone(dm, c, &faces)); 2484 for (PetscInt f = 0; f < nf; f++) { 2485 PetscInt nc, ff; 2486 2487 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2488 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2489 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2490 } 2491 if (cnt >= mcnt) { 2492 PetscInt size, *closure = NULL; 2493 2494 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2495 for (PetscInt k = 0; k < 2 * size; k += 2) { 2496 PetscInt v = closure[k]; 2497 if (v >= vStart && v < vEnd) { 2498 PetscInt vsize, *vclosure = NULL; 2499 2500 cnt = 0; 2501 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2502 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2503 PetscInt f = vclosure[vk]; 2504 if (f >= fStart && f < fEnd) { 2505 PetscInt nc, ff; 2506 PetscBool valid = PETSC_FALSE; 2507 2508 for (PetscInt fk = 0; fk < nf; fk++) 2509 if (f == cfaces[fk]) valid = PETSC_TRUE; 2510 if (!valid) continue; 2511 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2512 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2513 if (nc == 1 && f == ff) cnt++; 2514 } 2515 } 2516 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2517 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2518 } 2519 } 2520 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2521 } 2522 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2523 } 2524 } 2525 2526 cids[0] = 0; 2527 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2528 PetscInt j; 2529 2530 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2531 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2532 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2533 2534 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2535 for (k = 0; k < 2 * size; k += 2) { 2536 PetscInt s, pp, p = closure[k], off, dof, cdof; 2537 2538 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2539 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2540 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2541 for (s = 0; s < dof - cdof; s++) { 2542 if (PetscBTLookupSet(btvt, off + s)) continue; 2543 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2544 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2545 else pids[cump++] = off + s; /* cross-vertex */ 2546 } 2547 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2548 if (pp != p) { 2549 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2550 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2551 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2552 for (s = 0; s < dof - cdof; s++) { 2553 if (PetscBTLookupSet(btvt, off + s)) continue; 2554 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2555 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2556 else pids[cump++] = off + s; /* cross-vertex */ 2557 } 2558 } 2559 } 2560 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2561 } 2562 cids[i + 1] = cum; 2563 /* mark dofs as already assigned */ 2564 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2565 } 2566 if (cc) { 2567 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2568 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2569 *cc = cc_n; 2570 } 2571 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2572 PetscCall(PetscFree3(ids, cids, pids)); 2573 PetscCall(PetscBTDestroy(&btv)); 2574 PetscCall(PetscBTDestroy(&btvt)); 2575 PetscCall(PetscBTDestroy(&btvc)); 2576 PetscCall(DMDestroy(&dm)); 2577 } 2578 } else { 2579 if (ncc) *ncc = graph->ncc; 2580 if (cc) { 2581 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2582 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2583 *cc = cc_n; 2584 } 2585 } 2586 /* clean up graph */ 2587 graph->xadj = NULL; 2588 graph->adjncy = NULL; 2589 PetscCall(PCBDDCGraphDestroy(&graph)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2594 { 2595 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2596 PC_IS *pcis = (PC_IS *)pc->data; 2597 IS dirIS = NULL; 2598 PetscInt i; 2599 2600 PetscFunctionBegin; 2601 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2602 if (zerodiag) { 2603 Mat A; 2604 Vec vec3_N; 2605 PetscScalar *vals; 2606 const PetscInt *idxs; 2607 PetscInt nz, *count; 2608 2609 /* p0 */ 2610 PetscCall(VecSet(pcis->vec1_N, 0.)); 2611 PetscCall(PetscMalloc1(pcis->n, &vals)); 2612 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2613 PetscCall(ISGetIndices(zerodiag, &idxs)); 2614 for (i = 0; i < nz; i++) vals[i] = 1.; 2615 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2616 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2617 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2618 /* v_I */ 2619 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2620 for (i = 0; i < nz; i++) vals[i] = 0.; 2621 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2622 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2623 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2624 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2625 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2626 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2627 if (dirIS) { 2628 PetscInt n; 2629 2630 PetscCall(ISGetLocalSize(dirIS, &n)); 2631 PetscCall(ISGetIndices(dirIS, &idxs)); 2632 for (i = 0; i < n; i++) vals[i] = 0.; 2633 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2634 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2635 } 2636 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2637 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2638 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2639 PetscCall(VecSet(vec3_N, 0.)); 2640 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2641 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2642 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2643 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2644 PetscCall(PetscFree(vals)); 2645 PetscCall(VecDestroy(&vec3_N)); 2646 2647 /* there should not be any pressure dofs lying on the interface */ 2648 PetscCall(PetscCalloc1(pcis->n, &count)); 2649 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2650 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2651 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2652 PetscCall(ISGetIndices(zerodiag, &idxs)); 2653 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2654 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2655 PetscCall(PetscFree(count)); 2656 } 2657 PetscCall(ISDestroy(&dirIS)); 2658 2659 /* check PCBDDCBenignGetOrSetP0 */ 2660 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2661 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2662 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2663 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2664 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2665 for (i = 0; i < pcbddc->benign_n; i++) { 2666 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2667 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2668 } 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2673 { 2674 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2675 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2676 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2677 PetscInt nz, n, benign_n, bsp = 1; 2678 PetscInt *interior_dofs, n_interior_dofs, nneu; 2679 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2680 2681 PetscFunctionBegin; 2682 if (reuse) goto project_b0; 2683 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2684 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2685 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2686 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2687 has_null_pressures = PETSC_TRUE; 2688 have_null = PETSC_TRUE; 2689 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2690 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2691 Checks if all the pressure dofs in each subdomain have a zero diagonal 2692 If not, a change of basis on pressures is not needed 2693 since the local Schur complements are already SPD 2694 */ 2695 if (pcbddc->n_ISForDofsLocal) { 2696 IS iP = NULL; 2697 PetscInt p, *pp; 2698 PetscBool flg, blocked = PETSC_FALSE; 2699 2700 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2701 n = pcbddc->n_ISForDofsLocal; 2702 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2703 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2704 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2705 PetscOptionsEnd(); 2706 if (!flg) { 2707 n = 1; 2708 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2709 } 2710 2711 bsp = 0; 2712 for (p = 0; p < n; p++) { 2713 PetscInt bs = 1; 2714 2715 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2716 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2717 bsp += bs; 2718 } 2719 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2720 bsp = 0; 2721 for (p = 0; p < n; p++) { 2722 const PetscInt *idxs; 2723 PetscInt b, bs = 1, npl, *bidxs; 2724 2725 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2726 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2727 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2728 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2729 for (b = 0; b < bs; b++) { 2730 PetscInt i; 2731 2732 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2733 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2734 bsp++; 2735 } 2736 PetscCall(PetscFree(bidxs)); 2737 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2738 } 2739 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2740 2741 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2742 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2743 if (iP) { 2744 IS newpressures; 2745 2746 PetscCall(ISDifference(pressures, iP, &newpressures)); 2747 PetscCall(ISDestroy(&pressures)); 2748 pressures = newpressures; 2749 } 2750 PetscCall(ISSorted(pressures, &sorted)); 2751 if (!sorted) PetscCall(ISSort(pressures)); 2752 PetscCall(PetscFree(pp)); 2753 } 2754 2755 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2756 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2757 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2758 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2759 PetscCall(ISSorted(zerodiag, &sorted)); 2760 if (!sorted) PetscCall(ISSort(zerodiag)); 2761 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2762 zerodiag_save = zerodiag; 2763 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2764 if (!nz) { 2765 if (n) have_null = PETSC_FALSE; 2766 has_null_pressures = PETSC_FALSE; 2767 PetscCall(ISDestroy(&zerodiag)); 2768 } 2769 recompute_zerodiag = PETSC_FALSE; 2770 2771 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2772 zerodiag_subs = NULL; 2773 benign_n = 0; 2774 n_interior_dofs = 0; 2775 interior_dofs = NULL; 2776 nneu = 0; 2777 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2778 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2779 if (checkb) { /* need to compute interior nodes */ 2780 PetscInt n, i; 2781 PetscInt *count; 2782 ISLocalToGlobalMapping mapping; 2783 2784 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2785 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2786 PetscCall(PetscMalloc1(n, &interior_dofs)); 2787 for (i = 0; i < n; i++) 2788 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2789 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2790 } 2791 if (has_null_pressures) { 2792 IS *subs; 2793 PetscInt nsubs, i, j, nl; 2794 const PetscInt *idxs; 2795 PetscScalar *array; 2796 Vec *work; 2797 2798 subs = pcbddc->local_subs; 2799 nsubs = pcbddc->n_local_subs; 2800 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2801 if (checkb) { 2802 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2803 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2804 PetscCall(ISGetIndices(zerodiag, &idxs)); 2805 /* work[0] = 1_p */ 2806 PetscCall(VecSet(work[0], 0.)); 2807 PetscCall(VecGetArray(work[0], &array)); 2808 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2809 PetscCall(VecRestoreArray(work[0], &array)); 2810 /* work[0] = 1_v */ 2811 PetscCall(VecSet(work[1], 1.)); 2812 PetscCall(VecGetArray(work[1], &array)); 2813 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2814 PetscCall(VecRestoreArray(work[1], &array)); 2815 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2816 } 2817 2818 if (nsubs > 1 || bsp > 1) { 2819 IS *is; 2820 PetscInt b, totb; 2821 2822 totb = bsp; 2823 is = bsp > 1 ? bzerodiag : &zerodiag; 2824 nsubs = PetscMax(nsubs, 1); 2825 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2826 for (b = 0; b < totb; b++) { 2827 for (i = 0; i < nsubs; i++) { 2828 ISLocalToGlobalMapping l2g; 2829 IS t_zerodiag_subs; 2830 PetscInt nl; 2831 2832 if (subs) { 2833 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2834 } else { 2835 IS tis; 2836 2837 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2838 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2839 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2840 PetscCall(ISDestroy(&tis)); 2841 } 2842 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2843 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2844 if (nl) { 2845 PetscBool valid = PETSC_TRUE; 2846 2847 if (checkb) { 2848 PetscCall(VecSet(matis->x, 0)); 2849 PetscCall(ISGetLocalSize(subs[i], &nl)); 2850 PetscCall(ISGetIndices(subs[i], &idxs)); 2851 PetscCall(VecGetArray(matis->x, &array)); 2852 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2853 PetscCall(VecRestoreArray(matis->x, &array)); 2854 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2855 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2856 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2857 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2858 PetscCall(VecGetArray(matis->y, &array)); 2859 for (j = 0; j < n_interior_dofs; j++) { 2860 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2861 valid = PETSC_FALSE; 2862 break; 2863 } 2864 } 2865 PetscCall(VecRestoreArray(matis->y, &array)); 2866 } 2867 if (valid && nneu) { 2868 const PetscInt *idxs; 2869 PetscInt nzb; 2870 2871 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2872 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2873 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2874 if (nzb) valid = PETSC_FALSE; 2875 } 2876 if (valid && pressures) { 2877 IS t_pressure_subs, tmp; 2878 PetscInt i1, i2; 2879 2880 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2881 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2882 PetscCall(ISGetLocalSize(tmp, &i1)); 2883 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2884 if (i2 != i1) valid = PETSC_FALSE; 2885 PetscCall(ISDestroy(&t_pressure_subs)); 2886 PetscCall(ISDestroy(&tmp)); 2887 } 2888 if (valid) { 2889 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2890 benign_n++; 2891 } else recompute_zerodiag = PETSC_TRUE; 2892 } 2893 PetscCall(ISDestroy(&t_zerodiag_subs)); 2894 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2895 } 2896 } 2897 } else { /* there's just one subdomain (or zero if they have not been detected */ 2898 PetscBool valid = PETSC_TRUE; 2899 2900 if (nneu) valid = PETSC_FALSE; 2901 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2902 if (valid && checkb) { 2903 PetscCall(MatMult(matis->A, work[0], matis->x)); 2904 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2905 PetscCall(VecGetArray(matis->x, &array)); 2906 for (j = 0; j < n_interior_dofs; j++) { 2907 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2908 valid = PETSC_FALSE; 2909 break; 2910 } 2911 } 2912 PetscCall(VecRestoreArray(matis->x, &array)); 2913 } 2914 if (valid) { 2915 benign_n = 1; 2916 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2917 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2918 zerodiag_subs[0] = zerodiag; 2919 } 2920 } 2921 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2922 } 2923 PetscCall(PetscFree(interior_dofs)); 2924 2925 if (!benign_n) { 2926 PetscInt n; 2927 2928 PetscCall(ISDestroy(&zerodiag)); 2929 recompute_zerodiag = PETSC_FALSE; 2930 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2931 if (n) have_null = PETSC_FALSE; 2932 } 2933 2934 /* final check for null pressures */ 2935 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2936 2937 if (recompute_zerodiag) { 2938 PetscCall(ISDestroy(&zerodiag)); 2939 if (benign_n == 1) { 2940 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2941 zerodiag = zerodiag_subs[0]; 2942 } else { 2943 PetscInt i, nzn, *new_idxs; 2944 2945 nzn = 0; 2946 for (i = 0; i < benign_n; i++) { 2947 PetscInt ns; 2948 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2949 nzn += ns; 2950 } 2951 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2952 nzn = 0; 2953 for (i = 0; i < benign_n; i++) { 2954 PetscInt ns, *idxs; 2955 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2956 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2957 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2958 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2959 nzn += ns; 2960 } 2961 PetscCall(PetscSortInt(nzn, new_idxs)); 2962 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2963 } 2964 have_null = PETSC_FALSE; 2965 } 2966 2967 /* determines if the coarse solver will be singular or not */ 2968 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2969 2970 /* Prepare matrix to compute no-net-flux */ 2971 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2972 Mat A, loc_divudotp; 2973 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2974 IS row, col, isused = NULL; 2975 PetscInt M, N, n, st, n_isused; 2976 2977 if (pressures) { 2978 isused = pressures; 2979 } else { 2980 isused = zerodiag_save; 2981 } 2982 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2983 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2984 PetscCall(MatGetLocalSize(A, &n, NULL)); 2985 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2986 n_isused = 0; 2987 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2988 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2989 st = st - n_isused; 2990 if (n) { 2991 const PetscInt *gidxs; 2992 2993 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2994 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2995 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2996 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2997 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2998 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2999 } else { 3000 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3001 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3002 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3003 } 3004 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3005 PetscCall(ISGetSize(row, &M)); 3006 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3007 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3008 PetscCall(ISDestroy(&row)); 3009 PetscCall(ISDestroy(&col)); 3010 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3011 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3012 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3013 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3014 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3015 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3016 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3017 PetscCall(MatDestroy(&loc_divudotp)); 3018 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3019 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3020 } 3021 PetscCall(ISDestroy(&zerodiag_save)); 3022 PetscCall(ISDestroy(&pressures)); 3023 if (bzerodiag) { 3024 PetscInt i; 3025 3026 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3027 PetscCall(PetscFree(bzerodiag)); 3028 } 3029 pcbddc->benign_n = benign_n; 3030 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3031 3032 /* determines if the problem has subdomains with 0 pressure block */ 3033 have_null = (PetscBool)(!!pcbddc->benign_n); 3034 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3035 3036 project_b0: 3037 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3038 /* change of basis and p0 dofs */ 3039 if (pcbddc->benign_n) { 3040 PetscInt i, s, *nnz; 3041 3042 /* local change of basis for pressures */ 3043 PetscCall(MatDestroy(&pcbddc->benign_change)); 3044 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3045 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3046 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3047 PetscCall(PetscMalloc1(n, &nnz)); 3048 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3049 for (i = 0; i < pcbddc->benign_n; i++) { 3050 const PetscInt *idxs; 3051 PetscInt nzs, j; 3052 3053 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3054 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3055 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3056 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3057 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3058 } 3059 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3060 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3061 PetscCall(PetscFree(nnz)); 3062 /* set identity by default */ 3063 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3064 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3065 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3066 /* set change on pressures */ 3067 for (s = 0; s < pcbddc->benign_n; s++) { 3068 PetscScalar *array; 3069 const PetscInt *idxs; 3070 PetscInt nzs; 3071 3072 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3073 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3074 for (i = 0; i < nzs - 1; i++) { 3075 PetscScalar vals[2]; 3076 PetscInt cols[2]; 3077 3078 cols[0] = idxs[i]; 3079 cols[1] = idxs[nzs - 1]; 3080 vals[0] = 1.; 3081 vals[1] = 1.; 3082 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3083 } 3084 PetscCall(PetscMalloc1(nzs, &array)); 3085 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3086 array[nzs - 1] = 1.; 3087 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3088 /* store local idxs for p0 */ 3089 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3090 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3091 PetscCall(PetscFree(array)); 3092 } 3093 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3094 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3095 3096 /* project if needed */ 3097 if (pcbddc->benign_change_explicit) { 3098 Mat M; 3099 3100 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3101 PetscCall(MatDestroy(&pcbddc->local_mat)); 3102 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3103 PetscCall(MatDestroy(&M)); 3104 } 3105 /* store global idxs for p0 */ 3106 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3107 } 3108 *zerodiaglocal = zerodiag; 3109 PetscFunctionReturn(PETSC_SUCCESS); 3110 } 3111 3112 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3113 { 3114 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3115 PetscScalar *array; 3116 3117 PetscFunctionBegin; 3118 if (!pcbddc->benign_sf) { 3119 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3120 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3121 } 3122 if (get) { 3123 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3124 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3125 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3126 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3127 } else { 3128 PetscCall(VecGetArray(v, &array)); 3129 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3130 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3131 PetscCall(VecRestoreArray(v, &array)); 3132 } 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3137 { 3138 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3139 3140 PetscFunctionBegin; 3141 /* TODO: add error checking 3142 - avoid nested pop (or push) calls. 3143 - cannot push before pop. 3144 - cannot call this if pcbddc->local_mat is NULL 3145 */ 3146 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3147 if (pop) { 3148 if (pcbddc->benign_change_explicit) { 3149 IS is_p0; 3150 MatReuse reuse; 3151 3152 /* extract B_0 */ 3153 reuse = MAT_INITIAL_MATRIX; 3154 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3156 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3157 /* remove rows and cols from local problem */ 3158 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3159 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3160 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3161 PetscCall(ISDestroy(&is_p0)); 3162 } else { 3163 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3164 PetscScalar *vals; 3165 PetscInt i, n, *idxs_ins; 3166 3167 PetscCall(VecGetLocalSize(matis->y, &n)); 3168 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3169 if (!pcbddc->benign_B0) { 3170 PetscInt *nnz; 3171 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3172 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3173 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3174 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3175 for (i = 0; i < pcbddc->benign_n; i++) { 3176 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3177 nnz[i] = n - nnz[i]; 3178 } 3179 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3180 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3181 PetscCall(PetscFree(nnz)); 3182 } 3183 3184 for (i = 0; i < pcbddc->benign_n; i++) { 3185 PetscScalar *array; 3186 PetscInt *idxs, j, nz, cum; 3187 3188 PetscCall(VecSet(matis->x, 0.)); 3189 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3190 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3191 for (j = 0; j < nz; j++) vals[j] = 1.; 3192 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3193 PetscCall(VecAssemblyBegin(matis->x)); 3194 PetscCall(VecAssemblyEnd(matis->x)); 3195 PetscCall(VecSet(matis->y, 0.)); 3196 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3197 PetscCall(VecGetArray(matis->y, &array)); 3198 cum = 0; 3199 for (j = 0; j < n; j++) { 3200 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3201 vals[cum] = array[j]; 3202 idxs_ins[cum] = j; 3203 cum++; 3204 } 3205 } 3206 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3207 PetscCall(VecRestoreArray(matis->y, &array)); 3208 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3209 } 3210 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3211 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3212 PetscCall(PetscFree2(idxs_ins, vals)); 3213 } 3214 } else { /* push */ 3215 3216 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3217 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3218 PetscScalar *B0_vals; 3219 PetscInt *B0_cols, B0_ncol; 3220 3221 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3222 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3223 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3224 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3225 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3226 } 3227 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3228 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3229 } 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3234 { 3235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3236 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3237 PetscBLASInt B_neigs, B_ierr, B_lwork; 3238 PetscBLASInt *B_iwork, *B_ifail; 3239 PetscScalar *work, lwork; 3240 PetscScalar *St, *S, *eigv; 3241 PetscScalar *Sarray, *Starray; 3242 PetscReal *eigs, thresh, lthresh, uthresh; 3243 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3244 PetscBool allocated_S_St, upart; 3245 #if defined(PETSC_USE_COMPLEX) 3246 PetscReal *rwork; 3247 #endif 3248 3249 PetscFunctionBegin; 3250 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3251 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3252 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3253 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3254 sub_schurs->is_posdef); 3255 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3256 3257 if (pcbddc->dbg_flag) { 3258 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3259 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3260 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3261 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3262 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3263 } 3264 3265 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3266 3267 /* max size of subsets */ 3268 mss = 0; 3269 for (i = 0; i < sub_schurs->n_subs; i++) { 3270 PetscInt subset_size; 3271 3272 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3273 mss = PetscMax(mss, subset_size); 3274 } 3275 3276 /* min/max and threshold */ 3277 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3278 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3279 nmax = PetscMax(nmin, nmax); 3280 allocated_S_St = PETSC_FALSE; 3281 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3282 allocated_S_St = PETSC_TRUE; 3283 } 3284 3285 /* allocate lapack workspace */ 3286 cum = cum2 = 0; 3287 maxneigs = 0; 3288 for (i = 0; i < sub_schurs->n_subs; i++) { 3289 PetscInt n, subset_size; 3290 3291 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3292 n = PetscMin(subset_size, nmax); 3293 cum += subset_size; 3294 cum2 += subset_size * n; 3295 maxneigs = PetscMax(maxneigs, n); 3296 } 3297 lwork = 0; 3298 if (mss) { 3299 PetscScalar sdummy = 0.; 3300 PetscBLASInt B_itype = 1; 3301 PetscBLASInt B_N, idummy = 0; 3302 PetscReal rdummy = 0., zero = 0.0; 3303 PetscReal eps = 0.0; /* dlamch? */ 3304 3305 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3306 PetscCall(PetscBLASIntCast(mss, &B_N)); 3307 B_lwork = -1; 3308 /* some implementations may complain about NULL pointers, even if we are querying */ 3309 S = &sdummy; 3310 St = &sdummy; 3311 eigs = &rdummy; 3312 eigv = &sdummy; 3313 B_iwork = &idummy; 3314 B_ifail = &idummy; 3315 #if defined(PETSC_USE_COMPLEX) 3316 rwork = &rdummy; 3317 #endif 3318 thresh = 1.0; 3319 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3322 #else 3323 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3324 #endif 3325 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3326 PetscCall(PetscFPTrapPop()); 3327 } 3328 3329 nv = 0; 3330 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3331 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3332 } 3333 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3334 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3335 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3336 #if defined(PETSC_USE_COMPLEX) 3337 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3338 #endif 3339 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3340 &pcbddc->adaptive_constraints_data)); 3341 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3342 3343 maxneigs = 0; 3344 cum = cumarray = 0; 3345 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3346 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3347 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3348 const PetscInt *idxs; 3349 3350 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3351 for (cum = 0; cum < nv; cum++) { 3352 pcbddc->adaptive_constraints_n[cum] = 1; 3353 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3354 pcbddc->adaptive_constraints_data[cum] = 1.0; 3355 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3356 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3357 } 3358 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3359 } 3360 3361 if (mss) { /* multilevel */ 3362 if (sub_schurs->gdsw) { 3363 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3364 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3365 } else { 3366 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3367 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3368 } 3369 } 3370 3371 lthresh = pcbddc->adaptive_threshold[0]; 3372 uthresh = pcbddc->adaptive_threshold[1]; 3373 upart = pcbddc->use_deluxe_scaling; 3374 for (i = 0; i < sub_schurs->n_subs; i++) { 3375 const PetscInt *idxs; 3376 PetscReal upper, lower; 3377 PetscInt j, subset_size, eigs_start = 0; 3378 PetscBLASInt B_N; 3379 PetscBool same_data = PETSC_FALSE; 3380 PetscBool scal = PETSC_FALSE; 3381 3382 if (upart) { 3383 upper = PETSC_MAX_REAL; 3384 lower = uthresh; 3385 } else { 3386 if (sub_schurs->gdsw) { 3387 upper = uthresh; 3388 lower = PETSC_MIN_REAL; 3389 } else { 3390 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3391 upper = 1. / uthresh; 3392 lower = 0.; 3393 } 3394 } 3395 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3396 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3397 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3398 /* this is experimental: we assume the dofs have been properly grouped to have 3399 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3400 if (!sub_schurs->is_posdef) { 3401 Mat T; 3402 3403 for (j = 0; j < subset_size; j++) { 3404 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3405 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3406 PetscCall(MatScale(T, -1.0)); 3407 PetscCall(MatDestroy(&T)); 3408 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3409 PetscCall(MatScale(T, -1.0)); 3410 PetscCall(MatDestroy(&T)); 3411 if (sub_schurs->change_primal_sub) { 3412 PetscInt nz, k; 3413 const PetscInt *idxs; 3414 3415 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3416 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3417 for (k = 0; k < nz; k++) { 3418 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3419 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3420 } 3421 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3422 } 3423 scal = PETSC_TRUE; 3424 break; 3425 } 3426 } 3427 } 3428 3429 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3430 if (sub_schurs->is_symmetric) { 3431 PetscInt j, k; 3432 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3433 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3434 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3435 } 3436 for (j = 0; j < subset_size; j++) { 3437 for (k = j; k < subset_size; k++) { 3438 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3439 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3440 } 3441 } 3442 } else { 3443 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3444 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3445 } 3446 } else { 3447 S = Sarray + cumarray; 3448 St = Starray + cumarray; 3449 } 3450 /* see if we can save some work */ 3451 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3452 3453 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3454 B_neigs = 0; 3455 } else { 3456 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0; 3457 PetscReal eps = -1.0; /* dlamch? */ 3458 PetscInt nmin_s; 3459 PetscBool compute_range; 3460 3461 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3462 B_neigs = 0; 3463 compute_range = (PetscBool)!same_data; 3464 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3465 3466 if (pcbddc->dbg_flag) { 3467 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3468 3469 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3470 PetscCall( 3471 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3472 } 3473 3474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3475 if (compute_range) { 3476 /* ask for eigenvalues larger than thresh */ 3477 if (sub_schurs->is_posdef) { 3478 #if defined(PETSC_USE_COMPLEX) 3479 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3480 #else 3481 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3482 #endif 3483 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3484 } else { /* no theory so far, but it works nicely */ 3485 PetscInt recipe = 0, recipe_m = 1; 3486 PetscReal bb[2]; 3487 3488 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3489 switch (recipe) { 3490 case 0: 3491 if (scal) { 3492 bb[0] = PETSC_MIN_REAL; 3493 bb[1] = lthresh; 3494 } else { 3495 bb[0] = uthresh; 3496 bb[1] = PETSC_MAX_REAL; 3497 } 3498 #if defined(PETSC_USE_COMPLEX) 3499 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3500 #else 3501 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3502 #endif 3503 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3504 break; 3505 case 1: 3506 bb[0] = PETSC_MIN_REAL; 3507 bb[1] = lthresh * lthresh; 3508 #if defined(PETSC_USE_COMPLEX) 3509 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3510 #else 3511 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3512 #endif 3513 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3514 if (!scal) { 3515 PetscBLASInt B_neigs2 = 0; 3516 3517 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3518 bb[1] = PETSC_MAX_REAL; 3519 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3520 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3521 #if defined(PETSC_USE_COMPLEX) 3522 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3523 #else 3524 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3525 #endif 3526 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3527 B_neigs += B_neigs2; 3528 } 3529 break; 3530 case 2: 3531 if (scal) { 3532 bb[0] = PETSC_MIN_REAL; 3533 bb[1] = 0; 3534 #if defined(PETSC_USE_COMPLEX) 3535 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3536 #else 3537 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3538 #endif 3539 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3540 } else { 3541 PetscBLASInt B_neigs2 = 0; 3542 PetscBool do_copy = PETSC_FALSE; 3543 3544 lthresh = PetscMax(lthresh, 0.0); 3545 if (lthresh > 0.0) { 3546 bb[0] = PETSC_MIN_REAL; 3547 bb[1] = lthresh * lthresh; 3548 3549 do_copy = PETSC_TRUE; 3550 #if defined(PETSC_USE_COMPLEX) 3551 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3552 #else 3553 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3554 #endif 3555 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3556 } 3557 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3558 bb[1] = PETSC_MAX_REAL; 3559 if (do_copy) { 3560 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3561 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3562 } 3563 #if defined(PETSC_USE_COMPLEX) 3564 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3565 #else 3566 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3567 #endif 3568 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3569 B_neigs += B_neigs2; 3570 } 3571 break; 3572 case 3: 3573 if (scal) { 3574 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3575 } else { 3576 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3577 } 3578 if (!scal) { 3579 bb[0] = uthresh; 3580 bb[1] = PETSC_MAX_REAL; 3581 #if defined(PETSC_USE_COMPLEX) 3582 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3583 #else 3584 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3585 #endif 3586 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3587 } 3588 if (recipe_m > 0 && B_N - B_neigs > 0) { 3589 PetscBLASInt B_neigs2 = 0; 3590 3591 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3592 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3593 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3594 #if defined(PETSC_USE_COMPLEX) 3595 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3596 #else 3597 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3598 #endif 3599 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3600 B_neigs += B_neigs2; 3601 } 3602 break; 3603 case 4: 3604 bb[0] = PETSC_MIN_REAL; 3605 bb[1] = lthresh; 3606 #if defined(PETSC_USE_COMPLEX) 3607 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3608 #else 3609 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3610 #endif 3611 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3612 { 3613 PetscBLASInt B_neigs2 = 0; 3614 3615 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3616 bb[1] = PETSC_MAX_REAL; 3617 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3618 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3621 #else 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3623 #endif 3624 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3625 B_neigs += B_neigs2; 3626 } 3627 break; 3628 case 5: /* same as before: first compute all eigenvalues, then filter */ 3629 #if defined(PETSC_USE_COMPLEX) 3630 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3631 #else 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3633 #endif 3634 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3635 { 3636 PetscInt e, k, ne; 3637 for (e = 0, ne = 0; e < B_neigs; e++) { 3638 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3639 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3640 eigs[ne] = eigs[e]; 3641 ne++; 3642 } 3643 } 3644 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3645 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3646 } 3647 break; 3648 default: 3649 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3650 } 3651 } 3652 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3653 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3654 #if defined(PETSC_USE_COMPLEX) 3655 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3656 #else 3657 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3658 #endif 3659 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3660 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3661 PetscInt k; 3662 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3663 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3664 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3665 nmin = nmax; 3666 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3667 for (k = 0; k < nmax; k++) { 3668 eigs[k] = 1. / PETSC_SMALL; 3669 eigv[k * (subset_size + 1)] = 1.0; 3670 } 3671 } 3672 PetscCall(PetscFPTrapPop()); 3673 if (B_ierr) { 3674 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3675 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3676 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3677 } 3678 3679 if (B_neigs > nmax) { 3680 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3681 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3682 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3683 } 3684 3685 nmin_s = PetscMin(nmin, B_N); 3686 if (B_neigs < nmin_s) { 3687 PetscBLASInt B_neigs2 = 0; 3688 3689 if (upart) { 3690 if (scal) { 3691 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3692 B_IL = B_neigs + 1; 3693 } else { 3694 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3695 B_IU = B_N - B_neigs; 3696 } 3697 } else { 3698 B_IL = B_neigs + 1; 3699 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3700 } 3701 if (pcbddc->dbg_flag) { 3702 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3703 } 3704 if (sub_schurs->is_symmetric) { 3705 PetscInt j, k; 3706 for (j = 0; j < subset_size; j++) { 3707 for (k = j; k < subset_size; k++) { 3708 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3709 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3710 } 3711 } 3712 } else { 3713 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3714 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3715 } 3716 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3717 #if defined(PETSC_USE_COMPLEX) 3718 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3719 #else 3720 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3721 #endif 3722 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3723 PetscCall(PetscFPTrapPop()); 3724 B_neigs += B_neigs2; 3725 } 3726 if (B_ierr) { 3727 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3728 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3729 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3730 } 3731 if (pcbddc->dbg_flag) { 3732 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3733 for (j = 0; j < B_neigs; j++) { 3734 if (!sub_schurs->gdsw) { 3735 if (eigs[j] == 0.0) { 3736 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3737 } else { 3738 if (upart) { 3739 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3740 } else { 3741 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3742 } 3743 } 3744 } else { 3745 double pg = (double)eigs[j + eigs_start]; 3746 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3747 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3748 } 3749 } 3750 } 3751 } 3752 /* change the basis back to the original one */ 3753 if (sub_schurs->change) { 3754 Mat change, phi, phit; 3755 3756 if (pcbddc->dbg_flag > 2) { 3757 PetscInt ii; 3758 for (ii = 0; ii < B_neigs; ii++) { 3759 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3760 for (j = 0; j < B_N; j++) { 3761 #if defined(PETSC_USE_COMPLEX) 3762 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3763 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3764 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3765 #else 3766 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3767 #endif 3768 } 3769 } 3770 } 3771 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3772 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3773 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3774 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3775 PetscCall(MatDestroy(&phit)); 3776 PetscCall(MatDestroy(&phi)); 3777 } 3778 maxneigs = PetscMax(B_neigs, maxneigs); 3779 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3780 if (B_neigs) { 3781 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3782 3783 if (pcbddc->dbg_flag > 1) { 3784 PetscInt ii; 3785 for (ii = 0; ii < B_neigs; ii++) { 3786 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3787 for (j = 0; j < B_N; j++) { 3788 #if defined(PETSC_USE_COMPLEX) 3789 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3790 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3791 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3792 #else 3793 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3794 #endif 3795 } 3796 } 3797 } 3798 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3799 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3800 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3801 cum++; 3802 } 3803 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3804 /* shift for next computation */ 3805 cumarray += subset_size * subset_size; 3806 } 3807 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3808 3809 if (mss) { 3810 if (sub_schurs->gdsw) { 3811 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3812 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3813 } else { 3814 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3815 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3816 /* destroy matrices (junk) */ 3817 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3818 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3819 } 3820 } 3821 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3822 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3823 #if defined(PETSC_USE_COMPLEX) 3824 PetscCall(PetscFree(rwork)); 3825 #endif 3826 if (pcbddc->dbg_flag) { 3827 PetscInt maxneigs_r; 3828 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3829 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3830 } 3831 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3832 PetscFunctionReturn(PETSC_SUCCESS); 3833 } 3834 3835 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3836 { 3837 Mat coarse_submat; 3838 3839 PetscFunctionBegin; 3840 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3841 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3842 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3843 3844 /* Setup local neumann solver ksp_R */ 3845 /* PCBDDCSetUpLocalScatters should be called first! */ 3846 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3847 3848 /* 3849 Setup local correction and local part of coarse basis. 3850 Gives back the dense local part of the coarse matrix in column major ordering 3851 */ 3852 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3853 3854 /* Compute total number of coarse nodes and setup coarse solver */ 3855 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3856 PetscCall(MatDestroy(&coarse_submat)); 3857 PetscFunctionReturn(PETSC_SUCCESS); 3858 } 3859 3860 PetscErrorCode PCBDDCResetCustomization(PC pc) 3861 { 3862 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3863 3864 PetscFunctionBegin; 3865 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3866 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3867 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3868 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3869 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3870 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3871 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3872 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3873 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3874 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3875 PetscFunctionReturn(PETSC_SUCCESS); 3876 } 3877 3878 PetscErrorCode PCBDDCResetTopography(PC pc) 3879 { 3880 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3881 PetscInt i; 3882 3883 PetscFunctionBegin; 3884 PetscCall(MatDestroy(&pcbddc->nedcG)); 3885 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3886 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3887 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3888 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3889 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3890 PetscCall(VecDestroy(&pcbddc->work_change)); 3891 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3892 PetscCall(MatDestroy(&pcbddc->divudotp)); 3893 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3894 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3895 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3896 pcbddc->n_local_subs = 0; 3897 PetscCall(PetscFree(pcbddc->local_subs)); 3898 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3899 pcbddc->graphanalyzed = PETSC_FALSE; 3900 pcbddc->recompute_topography = PETSC_TRUE; 3901 pcbddc->corner_selected = PETSC_FALSE; 3902 PetscFunctionReturn(PETSC_SUCCESS); 3903 } 3904 3905 PetscErrorCode PCBDDCResetSolvers(PC pc) 3906 { 3907 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3908 3909 PetscFunctionBegin; 3910 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3911 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3912 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3913 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3914 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3915 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3916 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3917 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3918 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3919 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3920 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3921 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3922 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3923 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3924 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3925 PetscCall(KSPReset(pcbddc->ksp_D)); 3926 PetscCall(KSPReset(pcbddc->ksp_R)); 3927 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3928 PetscCall(MatDestroy(&pcbddc->local_mat)); 3929 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3930 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3931 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3932 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3933 PetscCall(MatDestroy(&pcbddc->benign_change)); 3934 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3935 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3936 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3937 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3938 if (pcbddc->benign_zerodiag_subs) { 3939 PetscInt i; 3940 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3941 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3942 } 3943 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3944 PetscFunctionReturn(PETSC_SUCCESS); 3945 } 3946 3947 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3948 { 3949 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3950 PC_IS *pcis = (PC_IS *)pc->data; 3951 VecType impVecType; 3952 PetscInt n_constraints, n_R, old_size; 3953 3954 PetscFunctionBegin; 3955 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3956 n_R = pcis->n - pcbddc->n_vertices; 3957 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3958 /* local work vectors (try to avoid unneeded work)*/ 3959 /* R nodes */ 3960 old_size = -1; 3961 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3962 if (n_R != old_size) { 3963 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3964 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3965 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3966 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3967 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3968 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3969 } 3970 /* local primal dofs */ 3971 old_size = -1; 3972 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3973 if (pcbddc->local_primal_size != old_size) { 3974 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3975 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3976 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3977 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3978 } 3979 /* local explicit constraints */ 3980 old_size = -1; 3981 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3982 if (n_constraints && n_constraints != old_size) { 3983 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3984 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3985 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3986 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3987 } 3988 PetscFunctionReturn(PETSC_SUCCESS); 3989 } 3990 3991 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 3992 { 3993 PetscBool flg; 3994 const PetscScalar *a; 3995 3996 PetscFunctionBegin; 3997 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 3998 if (flg) { 3999 PetscCall(MatDenseGetArrayRead(S, &a)); 4000 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4001 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4002 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4003 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4004 } else { 4005 const PetscInt *ii, *jj; 4006 PetscInt n; 4007 PetscInt buf[8192], *bufc = NULL; 4008 PetscBool freeb = PETSC_FALSE; 4009 Mat Sm = S; 4010 4011 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4012 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4013 else PetscCall(PetscObjectReference((PetscObject)S)); 4014 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4015 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4016 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4017 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4018 bufc = buf; 4019 } else { 4020 PetscCall(PetscMalloc1(nc, &bufc)); 4021 freeb = PETSC_TRUE; 4022 } 4023 4024 for (PetscInt i = 0; i < n; i++) { 4025 const PetscInt nci = ii[i + 1] - ii[i]; 4026 4027 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4028 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4029 } 4030 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4031 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4032 PetscCall(MatDestroy(&Sm)); 4033 if (freeb) PetscCall(PetscFree(bufc)); 4034 } 4035 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4036 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4037 PetscFunctionReturn(PETSC_SUCCESS); 4038 } 4039 4040 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4041 { 4042 Mat_SeqAIJ *aij; 4043 PetscInt *ii, *jj; 4044 PetscScalar *aa; 4045 PetscInt nnz = 0, m, nc; 4046 const PetscScalar *a; 4047 const PetscScalar zero = 0.0; 4048 4049 PetscFunctionBegin; 4050 PetscCall(MatGetLocalSize(D, &m, &nc)); 4051 PetscCall(MatDenseGetArrayRead(D, &a)); 4052 PetscCall(PetscMalloc1(m + 1, &ii)); 4053 PetscCall(PetscMalloc1(m * nc, &jj)); 4054 PetscCall(PetscMalloc1(m * nc, &aa)); 4055 ii[0] = 0; 4056 for (PetscInt k = 0; k < m; k++) { 4057 for (PetscInt s = 0; s < nc; s++) { 4058 const PetscInt c = s + k * nc; 4059 const PetscScalar v = a[k + s * m]; 4060 4061 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4062 jj[nnz] = j[c]; 4063 aa[nnz] = a[k + s * m]; 4064 nnz++; 4065 } 4066 ii[k + 1] = nnz; 4067 } 4068 4069 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4070 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4071 4072 aij = (Mat_SeqAIJ *)(*mat)->data; 4073 aij->free_a = PETSC_TRUE; 4074 aij->free_ij = PETSC_TRUE; 4075 PetscFunctionReturn(PETSC_SUCCESS); 4076 } 4077 4078 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4079 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4080 { 4081 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4082 const PetscBool allowzeropivot = PETSC_FALSE; 4083 PetscBool zeropivotdetected = PETSC_FALSE; 4084 const PetscReal shift = 0.0; 4085 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4086 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4087 PetscLogDouble flops = 0.0; 4088 4089 PetscFunctionBegin; 4090 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4091 for (PetscInt i = 0; i < nblocks; i++) { 4092 ncnt += bsizes[i]; 4093 ncnt2 += PetscSqr(bsizes[i]); 4094 } 4095 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4096 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4097 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4098 4099 PetscCall(PetscMalloc1(n + 1, &ii)); 4100 PetscCall(PetscMalloc1(ncnt2, &jj)); 4101 PetscCall(PetscCalloc1(ncnt2, &aa)); 4102 4103 ncnt = 0; 4104 ii[0] = 0; 4105 indi = ii; 4106 indj = jj; 4107 diag = aa; 4108 for (PetscInt i = 0; i < nblocks; i++) { 4109 const PetscInt bs = bsizes[i]; 4110 4111 for (PetscInt k = 0; k < bs; k++) { 4112 indi[k + 1] = indi[k] + bs; 4113 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4114 } 4115 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4116 switch (bs) { 4117 case 1: 4118 *diag = 1.0 / (*diag); 4119 break; 4120 case 2: 4121 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4122 break; 4123 case 3: 4124 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4125 break; 4126 case 4: 4127 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4128 break; 4129 case 5: 4130 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4131 break; 4132 case 6: 4133 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4134 break; 4135 case 7: 4136 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4137 break; 4138 default: 4139 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4140 } 4141 ncnt += bs; 4142 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4143 diag += bs * bs; 4144 indj += bs * bs; 4145 indi += bs; 4146 } 4147 PetscCall(PetscLogFlops(flops)); 4148 PetscCall(PetscFree2(v_work, v_pivots)); 4149 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4150 { 4151 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4152 aij->free_a = PETSC_TRUE; 4153 aij->free_ij = PETSC_TRUE; 4154 } 4155 PetscFunctionReturn(PETSC_SUCCESS); 4156 } 4157 4158 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4159 { 4160 const PetscScalar *rarr; 4161 PetscScalar *larr; 4162 PetscSF vsf; 4163 PetscInt n, rld, lld; 4164 4165 PetscFunctionBegin; 4166 PetscCall(MatGetSize(A, NULL, &n)); 4167 PetscCall(MatDenseGetLDA(A, &rld)); 4168 PetscCall(MatDenseGetLDA(B, &lld)); 4169 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4170 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4171 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4172 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4173 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4174 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4175 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4176 PetscCall(PetscSFDestroy(&vsf)); 4177 PetscFunctionReturn(PETSC_SUCCESS); 4178 } 4179 4180 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4181 { 4182 PC_IS *pcis = (PC_IS *)pc->data; 4183 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4184 PCBDDCGraph graph = pcbddc->mat_graph; 4185 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4186 /* submatrices of local problem */ 4187 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4188 /* submatrices of local coarse problem */ 4189 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4190 /* working matrices */ 4191 Mat C_CR; 4192 4193 /* additional working stuff */ 4194 PC pc_R; 4195 IS is_R, is_V, is_C; 4196 const PetscInt *idx_V, *idx_C; 4197 Mat F, Brhs = NULL; 4198 Vec dummy_vec; 4199 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4200 PetscInt *idx_V_B; 4201 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4202 PetscInt n_eff_vertices, n_eff_constraints; 4203 PetscInt i, n_R, n_D, n_B; 4204 PetscScalar one = 1.0, m_one = -1.0; 4205 4206 /* Multi-element support */ 4207 PetscBool multi_element = graph->multi_element; 4208 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4209 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4210 IS is_C_perm = NULL; 4211 PetscInt n_C_bss = 0, *C_bss = NULL; 4212 Mat coarse_phi_multi; 4213 4214 PetscFunctionBegin; 4215 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4216 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4217 4218 /* Set Non-overlapping dimensions */ 4219 n_vertices = pcbddc->n_vertices; 4220 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4221 n_B = pcis->n_B; 4222 n_D = pcis->n - n_B; 4223 n_R = pcis->n - n_vertices; 4224 4225 /* vertices in boundary numbering */ 4226 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4227 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4228 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4229 4230 /* these two cases still need to be optimized */ 4231 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4232 4233 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4234 if (multi_element) { 4235 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4236 4237 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4238 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4239 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4240 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4241 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4242 4243 /* group vertices and constraints by subdomain id */ 4244 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4245 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4246 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4247 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4248 4249 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4250 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4251 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4252 for (PetscInt i = 0; i < n_vertices; i++) { 4253 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4254 4255 V_to_eff_V[i] = count_eff[s]; 4256 count_eff[s] += 1; 4257 } 4258 for (PetscInt i = 0; i < n_constraints; i++) { 4259 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4260 4261 C_to_eff_C[i] = count_eff[s]; 4262 count_eff[s] += 1; 4263 } 4264 4265 /* preallocation */ 4266 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4267 for (PetscInt i = 0; i < n_vertices; i++) { 4268 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4269 4270 nnz[i] = count_eff[s] + count_eff[s + 1]; 4271 } 4272 for (PetscInt i = 0; i < n_constraints; i++) { 4273 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4274 4275 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4276 } 4277 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4278 PetscCall(PetscFree(nnz)); 4279 4280 n_eff_vertices = 0; 4281 n_eff_constraints = 0; 4282 for (PetscInt i = 0; i < n_el; i++) { 4283 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4284 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4285 count_eff[2 * i] = 0; 4286 count_eff[2 * i + 1] = 0; 4287 } 4288 4289 const PetscInt *idx; 4290 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4291 4292 for (PetscInt i = 0; i < n_vertices; i++) { 4293 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4294 const PetscInt s = 2 * e; 4295 4296 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4297 count_eff[s] += 1; 4298 } 4299 for (PetscInt i = 0; i < n_constraints; i++) { 4300 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4301 const PetscInt s = 2 * e + 1; 4302 4303 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4304 count_eff[s] += 1; 4305 } 4306 4307 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4308 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4309 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4310 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4311 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4312 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4313 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4314 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4315 4316 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4317 for (PetscInt i = 0; i < n_R; i++) { 4318 const PetscInt e = graph->nodes[idx[i]].local_sub; 4319 const PetscInt s = 2 * e; 4320 PetscInt j; 4321 4322 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4323 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4324 } 4325 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4326 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4327 for (PetscInt i = 0; i < n_B; i++) { 4328 const PetscInt e = graph->nodes[idx[i]].local_sub; 4329 const PetscInt s = 2 * e; 4330 PetscInt j; 4331 4332 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4333 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4334 } 4335 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4336 4337 /* permutation and blocksizes for block invert of S_CC */ 4338 PetscInt *idxp; 4339 4340 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4341 PetscCall(PetscMalloc1(n_el, &C_bss)); 4342 n_C_bss = 0; 4343 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4344 const PetscInt nc = count_eff[2 * e + 1]; 4345 4346 if (nc) C_bss[n_C_bss++] = nc; 4347 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; 4348 cnt += nc; 4349 } 4350 4351 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4352 4353 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4354 PetscCall(PetscFree(count_eff)); 4355 } else { 4356 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4357 n_eff_constraints = n_constraints; 4358 n_eff_vertices = n_vertices; 4359 } 4360 4361 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4362 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4363 PetscCall(PCSetUp(pc_R)); 4364 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4365 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4366 lda_rhs = n_R; 4367 need_benign_correction = PETSC_FALSE; 4368 if (isLU || isCHOL) { 4369 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4370 } else if (sub_schurs && sub_schurs->reuse_solver) { 4371 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4372 MatFactorType type; 4373 4374 F = reuse_solver->F; 4375 PetscCall(MatGetFactorType(F, &type)); 4376 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4377 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4378 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4379 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4380 } else F = NULL; 4381 4382 /* determine if we can use a sparse right-hand side */ 4383 sparserhs = PETSC_FALSE; 4384 if (F && !multi_element) { 4385 MatSolverType solver; 4386 4387 PetscCall(MatFactorGetSolverType(F, &solver)); 4388 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4389 } 4390 4391 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4392 dummy_vec = NULL; 4393 if (need_benign_correction && lda_rhs != n_R && F) { 4394 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4395 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4396 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4397 } 4398 4399 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4400 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4401 4402 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4403 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4404 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4405 PetscCall(ISGetIndices(is_V, &idx_V)); 4406 PetscCall(ISGetIndices(is_C, &idx_C)); 4407 4408 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4409 if (n_constraints) { 4410 Mat C_B; 4411 4412 /* Extract constraints on R nodes: C_{CR} */ 4413 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4414 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4415 4416 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4417 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4418 if (!sparserhs) { 4419 PetscScalar *marr; 4420 4421 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4422 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4423 for (i = 0; i < n_constraints; i++) { 4424 const PetscScalar *row_cmat_values; 4425 const PetscInt *row_cmat_indices; 4426 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4427 4428 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4429 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4430 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4431 } 4432 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4433 } else { 4434 Mat tC_CR; 4435 4436 PetscCall(MatScale(C_CR, -1.0)); 4437 if (lda_rhs != n_R) { 4438 PetscScalar *aa; 4439 PetscInt r, *ii, *jj; 4440 PetscBool done; 4441 4442 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4443 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4444 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4445 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4446 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4447 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4448 } else { 4449 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4450 tC_CR = C_CR; 4451 } 4452 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4453 PetscCall(MatDestroy(&tC_CR)); 4454 } 4455 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4456 if (F) { 4457 if (need_benign_correction) { 4458 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4459 4460 /* rhs is already zero on interior dofs, no need to change the rhs */ 4461 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4462 } 4463 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4464 if (need_benign_correction) { 4465 PetscScalar *marr; 4466 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4467 4468 /* XXX multi_element? */ 4469 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4470 if (lda_rhs != n_R) { 4471 for (i = 0; i < n_eff_constraints; i++) { 4472 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4473 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4474 PetscCall(VecResetArray(dummy_vec)); 4475 } 4476 } else { 4477 for (i = 0; i < n_eff_constraints; i++) { 4478 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4479 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4480 PetscCall(VecResetArray(pcbddc->vec1_R)); 4481 } 4482 } 4483 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4484 } 4485 } else { 4486 const PetscScalar *barr; 4487 PetscScalar *marr; 4488 4489 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4490 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4491 for (i = 0; i < n_eff_constraints; i++) { 4492 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4493 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4494 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4495 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4496 PetscCall(VecResetArray(pcbddc->vec1_R)); 4497 PetscCall(VecResetArray(pcbddc->vec2_R)); 4498 } 4499 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4500 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4501 } 4502 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4503 PetscCall(MatDestroy(&Brhs)); 4504 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4505 if (!pcbddc->switch_static) { 4506 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4507 for (i = 0; i < n_eff_constraints; i++) { 4508 Vec r, b; 4509 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4510 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4511 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4512 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4513 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4514 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4515 } 4516 if (multi_element) { 4517 Mat T; 4518 4519 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4520 PetscCall(MatDestroy(&local_auxmat2_R)); 4521 local_auxmat2_R = T; 4522 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4523 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4524 pcbddc->local_auxmat2 = T; 4525 } 4526 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4527 } else { 4528 if (multi_element) { 4529 Mat T; 4530 4531 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4532 PetscCall(MatDestroy(&local_auxmat2_R)); 4533 local_auxmat2_R = T; 4534 } 4535 if (lda_rhs != n_R) { 4536 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4537 } else { 4538 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4539 pcbddc->local_auxmat2 = local_auxmat2_R; 4540 } 4541 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4542 } 4543 PetscCall(MatScale(S_CC, m_one)); 4544 if (multi_element) { 4545 Mat T, T2; 4546 IS isp, ispi; 4547 4548 isp = is_C_perm; 4549 4550 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4551 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4552 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4553 PetscCall(MatDestroy(&T)); 4554 PetscCall(MatDestroy(&S_CC)); 4555 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4556 PetscCall(MatDestroy(&T2)); 4557 PetscCall(ISDestroy(&ispi)); 4558 } else { 4559 if (isCHOL) { 4560 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4561 } else { 4562 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4563 } 4564 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4565 } 4566 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4567 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4568 PetscCall(MatDestroy(&C_B)); 4569 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4570 } 4571 4572 /* Get submatrices from subdomain matrix */ 4573 if (n_vertices) { 4574 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4575 PetscBool oldpin; 4576 #endif 4577 IS is_aux; 4578 4579 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4580 IS tis; 4581 4582 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4583 PetscCall(ISSort(tis)); 4584 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4585 PetscCall(ISDestroy(&tis)); 4586 } else { 4587 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4588 } 4589 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4590 oldpin = pcbddc->local_mat->boundtocpu; 4591 #endif 4592 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4593 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4594 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4595 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4596 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4597 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4598 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4599 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4600 #endif 4601 PetscCall(ISDestroy(&is_aux)); 4602 } 4603 PetscCall(ISDestroy(&is_C_perm)); 4604 PetscCall(PetscFree(C_bss)); 4605 4606 p0_lidx_I = NULL; 4607 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4608 const PetscInt *idxs; 4609 4610 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4611 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4612 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4613 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4614 } 4615 4616 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4617 4618 /* Matrices of coarse basis functions (local) */ 4619 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4620 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4621 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4622 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4623 if (!multi_element) { 4624 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4625 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4626 coarse_phi_multi = NULL; 4627 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4628 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4629 IS is_cols[2] = {is_V, is_C}; 4630 4631 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4632 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4633 PetscCall(ISDestroy(&is_rows[1])); 4634 } 4635 4636 /* vertices */ 4637 if (n_vertices) { 4638 PetscBool restoreavr = PETSC_FALSE; 4639 Mat A_RRmA_RV = NULL; 4640 4641 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4642 PetscCall(MatDestroy(&A_VV)); 4643 4644 if (n_R) { 4645 Mat A_RV_bcorr = NULL, S_VV; 4646 4647 PetscCall(MatScale(A_RV, m_one)); 4648 if (need_benign_correction) { 4649 ISLocalToGlobalMapping RtoN; 4650 IS is_p0; 4651 PetscInt *idxs_p0, n; 4652 4653 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4654 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4655 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4656 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4657 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4658 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4659 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4660 PetscCall(ISDestroy(&is_p0)); 4661 } 4662 4663 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4664 if (!sparserhs || need_benign_correction) { 4665 if (lda_rhs == n_R && !multi_element) { 4666 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4667 } else { 4668 Mat T; 4669 PetscScalar *av, *array; 4670 const PetscInt *xadj, *adjncy; 4671 PetscInt n; 4672 PetscBool flg_row; 4673 4674 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4675 PetscCall(MatDenseGetArrayWrite(T, &array)); 4676 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4677 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4678 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4679 for (i = 0; i < n; i++) { 4680 PetscInt j; 4681 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4682 } 4683 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4684 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4685 PetscCall(MatDestroy(&A_RV)); 4686 A_RV = T; 4687 } 4688 if (need_benign_correction) { 4689 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4690 PetscScalar *marr; 4691 4692 /* XXX multi_element */ 4693 PetscCall(MatDenseGetArray(A_RV, &marr)); 4694 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4695 4696 | 0 0 0 | (V) 4697 L = | 0 0 -1 | (P-p0) 4698 | 0 0 -1 | (p0) 4699 4700 */ 4701 for (i = 0; i < reuse_solver->benign_n; i++) { 4702 const PetscScalar *vals; 4703 const PetscInt *idxs, *idxs_zero; 4704 PetscInt n, j, nz; 4705 4706 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4707 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4708 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4709 for (j = 0; j < n; j++) { 4710 PetscScalar val = vals[j]; 4711 PetscInt k, col = idxs[j]; 4712 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4713 } 4714 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4715 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4716 } 4717 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4718 } 4719 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4720 Brhs = A_RV; 4721 } else { 4722 Mat tA_RVT, A_RVT; 4723 4724 if (!pcbddc->symmetric_primal) { 4725 /* A_RV already scaled by -1 */ 4726 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4727 } else { 4728 restoreavr = PETSC_TRUE; 4729 PetscCall(MatScale(A_VR, -1.0)); 4730 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4731 A_RVT = A_VR; 4732 } 4733 if (lda_rhs != n_R) { 4734 PetscScalar *aa; 4735 PetscInt r, *ii, *jj; 4736 PetscBool done; 4737 4738 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4739 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4740 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4741 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4742 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4743 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4744 } else { 4745 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4746 tA_RVT = A_RVT; 4747 } 4748 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4749 PetscCall(MatDestroy(&tA_RVT)); 4750 PetscCall(MatDestroy(&A_RVT)); 4751 } 4752 if (F) { 4753 /* need to correct the rhs */ 4754 if (need_benign_correction) { 4755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4756 PetscScalar *marr; 4757 4758 PetscCall(MatDenseGetArray(Brhs, &marr)); 4759 if (lda_rhs != n_R) { 4760 for (i = 0; i < n_eff_vertices; i++) { 4761 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4762 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4763 PetscCall(VecResetArray(dummy_vec)); 4764 } 4765 } else { 4766 for (i = 0; i < n_eff_vertices; i++) { 4767 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4768 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4769 PetscCall(VecResetArray(pcbddc->vec1_R)); 4770 } 4771 } 4772 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4773 } 4774 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4775 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4776 /* need to correct the solution */ 4777 if (need_benign_correction) { 4778 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4779 PetscScalar *marr; 4780 4781 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4782 if (lda_rhs != n_R) { 4783 for (i = 0; i < n_eff_vertices; i++) { 4784 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4785 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4786 PetscCall(VecResetArray(dummy_vec)); 4787 } 4788 } else { 4789 for (i = 0; i < n_eff_vertices; i++) { 4790 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4791 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4792 PetscCall(VecResetArray(pcbddc->vec1_R)); 4793 } 4794 } 4795 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4796 } 4797 } else { 4798 const PetscScalar *barr; 4799 PetscScalar *marr; 4800 4801 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4802 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4803 for (i = 0; i < n_eff_vertices; i++) { 4804 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4805 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4806 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4807 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4808 PetscCall(VecResetArray(pcbddc->vec1_R)); 4809 PetscCall(VecResetArray(pcbddc->vec2_R)); 4810 } 4811 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4812 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4813 } 4814 PetscCall(MatDestroy(&A_RV)); 4815 PetscCall(MatDestroy(&Brhs)); 4816 /* S_VV and S_CV */ 4817 if (n_constraints) { 4818 Mat B; 4819 4820 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4821 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4822 4823 /* S_CV = pcbddc->local_auxmat1 * B */ 4824 if (multi_element) { 4825 Mat T; 4826 4827 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4828 PetscCall(MatDestroy(&B)); 4829 B = T; 4830 } 4831 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4832 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4833 PetscCall(MatProductSetFromOptions(S_CV)); 4834 PetscCall(MatProductSymbolic(S_CV)); 4835 PetscCall(MatProductNumeric(S_CV)); 4836 PetscCall(MatProductClear(S_CV)); 4837 PetscCall(MatDestroy(&B)); 4838 4839 /* B = local_auxmat2_R * S_CV */ 4840 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4841 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4842 PetscCall(MatProductSetFromOptions(B)); 4843 PetscCall(MatProductSymbolic(B)); 4844 PetscCall(MatProductNumeric(B)); 4845 4846 PetscCall(MatScale(S_CV, m_one)); 4847 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4848 4849 if (multi_element) { 4850 Mat T; 4851 4852 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4853 PetscCall(MatDestroy(&A_RRmA_RV)); 4854 A_RRmA_RV = T; 4855 } 4856 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4857 PetscCall(MatDestroy(&B)); 4858 } else if (multi_element) { 4859 Mat T; 4860 4861 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4862 PetscCall(MatDestroy(&A_RRmA_RV)); 4863 A_RRmA_RV = T; 4864 } 4865 4866 if (lda_rhs != n_R) { 4867 Mat T; 4868 4869 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4870 PetscCall(MatDestroy(&A_RRmA_RV)); 4871 A_RRmA_RV = T; 4872 } 4873 4874 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4875 if (need_benign_correction) { /* XXX SPARSE */ 4876 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4877 PetscScalar *sums; 4878 const PetscScalar *marr; 4879 4880 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4881 PetscCall(PetscMalloc1(n_vertices, &sums)); 4882 for (i = 0; i < reuse_solver->benign_n; i++) { 4883 const PetscScalar *vals; 4884 const PetscInt *idxs, *idxs_zero; 4885 PetscInt n, j, nz; 4886 4887 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4888 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4889 for (j = 0; j < n_vertices; j++) { 4890 sums[j] = 0.; 4891 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4892 } 4893 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4894 for (j = 0; j < n; j++) { 4895 PetscScalar val = vals[j]; 4896 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4897 } 4898 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4899 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4900 } 4901 PetscCall(PetscFree(sums)); 4902 PetscCall(MatDestroy(&A_RV_bcorr)); 4903 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4904 } 4905 4906 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 4907 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4908 PetscCall(MatDestroy(&S_VV)); 4909 } 4910 4911 /* coarse basis functions */ 4912 if (coarse_phi_multi) { 4913 Mat Vid; 4914 4915 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4916 PetscCall(MatShift_Basic(Vid, 1.0)); 4917 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4918 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4919 PetscCall(MatDestroy(&Vid)); 4920 } else { 4921 if (A_RRmA_RV) { 4922 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4924 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4925 if (pcbddc->benign_n) { 4926 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 4927 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4928 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4929 } 4930 } 4931 } 4932 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4933 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4934 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4935 } 4936 PetscCall(MatDestroy(&A_RRmA_RV)); 4937 } 4938 PetscCall(MatDestroy(&A_RV)); 4939 PetscCall(VecDestroy(&dummy_vec)); 4940 4941 if (n_constraints) { 4942 Mat B, B2; 4943 4944 PetscCall(MatScale(S_CC, m_one)); 4945 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4946 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4947 PetscCall(MatProductSetFromOptions(B)); 4948 PetscCall(MatProductSymbolic(B)); 4949 PetscCall(MatProductNumeric(B)); 4950 4951 if (n_vertices) { 4952 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4953 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4954 } else { 4955 if (lda_rhs != n_R) { 4956 Mat tB; 4957 4958 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4959 PetscCall(MatDestroy(&B)); 4960 B = tB; 4961 } 4962 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 4963 } 4964 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4965 } 4966 4967 /* coarse basis functions */ 4968 if (coarse_phi_multi) { 4969 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4970 } else { 4971 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4972 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4973 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4974 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4975 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4976 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4977 if (pcbddc->benign_n) { 4978 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 4979 } 4980 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 4981 } 4982 } 4983 PetscCall(MatDestroy(&B)); 4984 } 4985 4986 /* assemble sparse coarse basis functions */ 4987 if (coarse_phi_multi) { 4988 Mat T; 4989 4990 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 4991 PetscCall(MatDestroy(&coarse_phi_multi)); 4992 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 4993 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); 4994 PetscCall(MatDestroy(&T)); 4995 } 4996 PetscCall(MatDestroy(&local_auxmat2_R)); 4997 PetscCall(PetscFree(p0_lidx_I)); 4998 4999 /* coarse matrix entries relative to B_0 */ 5000 if (pcbddc->benign_n) { 5001 Mat B0_B, B0_BPHI; 5002 IS is_dummy; 5003 const PetscScalar *data; 5004 PetscInt j; 5005 5006 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5007 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5008 PetscCall(ISDestroy(&is_dummy)); 5009 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5010 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5011 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5012 for (j = 0; j < pcbddc->benign_n; j++) { 5013 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5014 for (i = 0; i < pcbddc->local_primal_size; i++) { 5015 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5016 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5017 } 5018 } 5019 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5020 PetscCall(MatDestroy(&B0_B)); 5021 PetscCall(MatDestroy(&B0_BPHI)); 5022 } 5023 5024 /* compute other basis functions for non-symmetric problems */ 5025 if (!pcbddc->symmetric_primal) { 5026 Mat B_V = NULL, B_C = NULL; 5027 PetscScalar *marray, *work; 5028 5029 /* TODO multi_element MatDenseScatter */ 5030 if (n_constraints) { 5031 Mat S_CCT, C_CRT; 5032 5033 PetscCall(MatScale(S_CC, m_one)); 5034 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5035 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5036 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5037 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5038 PetscCall(MatDestroy(&S_CCT)); 5039 if (n_vertices) { 5040 Mat S_VCT; 5041 5042 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5043 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5044 PetscCall(MatDestroy(&S_VCT)); 5045 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5046 } 5047 PetscCall(MatDestroy(&C_CRT)); 5048 } else { 5049 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5050 } 5051 if (n_vertices && n_R) { 5052 PetscScalar *av, *marray; 5053 const PetscInt *xadj, *adjncy; 5054 PetscInt n; 5055 PetscBool flg_row; 5056 5057 /* B_V = B_V - A_VR^T */ 5058 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5059 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5060 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5061 PetscCall(MatDenseGetArray(B_V, &marray)); 5062 for (i = 0; i < n; i++) { 5063 PetscInt j; 5064 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5065 } 5066 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5067 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5068 PetscCall(MatDestroy(&A_VR)); 5069 } 5070 5071 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5072 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5073 if (n_vertices) { 5074 PetscCall(MatDenseGetArray(B_V, &marray)); 5075 for (i = 0; i < n_vertices; i++) { 5076 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5077 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5078 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5079 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5080 PetscCall(VecResetArray(pcbddc->vec1_R)); 5081 PetscCall(VecResetArray(pcbddc->vec2_R)); 5082 } 5083 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5084 } 5085 if (B_C) { 5086 PetscCall(MatDenseGetArray(B_C, &marray)); 5087 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5088 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5089 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5090 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5091 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5092 PetscCall(VecResetArray(pcbddc->vec1_R)); 5093 PetscCall(VecResetArray(pcbddc->vec2_R)); 5094 } 5095 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5096 } 5097 /* coarse basis functions */ 5098 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5099 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5100 for (i = 0; i < pcbddc->local_primal_size; i++) { 5101 Vec v; 5102 5103 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5104 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5105 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5106 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5107 if (i < n_vertices) { 5108 PetscScalar one = 1.0; 5109 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5110 PetscCall(VecAssemblyBegin(v)); 5111 PetscCall(VecAssemblyEnd(v)); 5112 } 5113 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5114 5115 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5116 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5117 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5118 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5119 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5120 } 5121 PetscCall(VecResetArray(pcbddc->vec1_R)); 5122 } 5123 PetscCall(MatDestroy(&B_V)); 5124 PetscCall(MatDestroy(&B_C)); 5125 PetscCall(PetscFree(work)); 5126 } else { 5127 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5128 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5129 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5130 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5131 } 5132 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5133 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5134 5135 /* free memory */ 5136 PetscCall(PetscFree(V_to_eff_V)); 5137 PetscCall(PetscFree(C_to_eff_C)); 5138 PetscCall(PetscFree(R_eff_V_J)); 5139 PetscCall(PetscFree(R_eff_C_J)); 5140 PetscCall(PetscFree(B_eff_V_J)); 5141 PetscCall(PetscFree(B_eff_C_J)); 5142 PetscCall(ISDestroy(&is_R)); 5143 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5144 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5145 PetscCall(ISDestroy(&is_V)); 5146 PetscCall(ISDestroy(&is_C)); 5147 PetscCall(PetscFree(idx_V_B)); 5148 PetscCall(MatDestroy(&S_CV)); 5149 PetscCall(MatDestroy(&S_VC)); 5150 PetscCall(MatDestroy(&S_CC)); 5151 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5152 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5153 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5154 5155 /* Checking coarse_sub_mat and coarse basis functions */ 5156 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5157 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5158 if (pcbddc->dbg_flag) { 5159 Mat AUXMAT, TM1, TM2, TM3, TM4; 5160 Mat coarse_phi_D, coarse_phi_B; 5161 Mat coarse_psi_D, coarse_psi_B; 5162 Mat A_II, A_BB, A_IB, A_BI; 5163 Mat C_B, CPHI; 5164 IS is_dummy; 5165 Vec mones; 5166 MatType checkmattype = MATSEQAIJ; 5167 PetscReal real_value; 5168 5169 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5170 Mat A; 5171 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5172 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5173 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5174 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5175 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5176 PetscCall(MatDestroy(&A)); 5177 } else { 5178 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5179 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5180 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5181 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5182 } 5183 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5184 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5185 if (!pcbddc->symmetric_primal) { 5186 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5187 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5188 } 5189 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5190 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5191 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5192 if (!pcbddc->symmetric_primal) { 5193 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5194 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5195 PetscCall(MatDestroy(&AUXMAT)); 5196 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5197 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5198 PetscCall(MatDestroy(&AUXMAT)); 5199 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5200 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5201 PetscCall(MatDestroy(&AUXMAT)); 5202 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5203 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5204 PetscCall(MatDestroy(&AUXMAT)); 5205 } else { 5206 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5207 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5208 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5209 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5210 PetscCall(MatDestroy(&AUXMAT)); 5211 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5212 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5213 PetscCall(MatDestroy(&AUXMAT)); 5214 } 5215 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5216 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5217 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5218 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5219 if (pcbddc->benign_n) { 5220 Mat B0_B, B0_BPHI; 5221 const PetscScalar *data2; 5222 PetscScalar *data; 5223 PetscInt j; 5224 5225 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5226 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5227 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5228 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5229 PetscCall(MatDenseGetArray(TM1, &data)); 5230 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5231 for (j = 0; j < pcbddc->benign_n; j++) { 5232 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5233 for (i = 0; i < pcbddc->local_primal_size; i++) { 5234 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5235 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5236 } 5237 } 5238 PetscCall(MatDenseRestoreArray(TM1, &data)); 5239 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5240 PetscCall(MatDestroy(&B0_B)); 5241 PetscCall(ISDestroy(&is_dummy)); 5242 PetscCall(MatDestroy(&B0_BPHI)); 5243 } 5244 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5245 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5246 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5247 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5248 5249 /* check constraints */ 5250 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5251 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5252 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5253 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5254 } else { 5255 PetscScalar *data; 5256 Mat tmat; 5257 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5259 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5260 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5261 PetscCall(MatDestroy(&tmat)); 5262 } 5263 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5264 PetscCall(VecSet(mones, -1.0)); 5265 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5266 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5267 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5268 if (!pcbddc->symmetric_primal) { 5269 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5270 PetscCall(VecSet(mones, -1.0)); 5271 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5272 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5273 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5274 } 5275 PetscCall(MatDestroy(&C_B)); 5276 PetscCall(MatDestroy(&CPHI)); 5277 PetscCall(ISDestroy(&is_dummy)); 5278 PetscCall(VecDestroy(&mones)); 5279 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5280 PetscCall(MatDestroy(&A_II)); 5281 PetscCall(MatDestroy(&A_BB)); 5282 PetscCall(MatDestroy(&A_IB)); 5283 PetscCall(MatDestroy(&A_BI)); 5284 PetscCall(MatDestroy(&TM1)); 5285 PetscCall(MatDestroy(&TM2)); 5286 PetscCall(MatDestroy(&TM3)); 5287 PetscCall(MatDestroy(&TM4)); 5288 PetscCall(MatDestroy(&coarse_phi_D)); 5289 PetscCall(MatDestroy(&coarse_phi_B)); 5290 if (!pcbddc->symmetric_primal) { 5291 PetscCall(MatDestroy(&coarse_psi_D)); 5292 PetscCall(MatDestroy(&coarse_psi_B)); 5293 } 5294 } 5295 5296 #if 0 5297 { 5298 PetscViewer viewer; 5299 char filename[256]; 5300 5301 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5302 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5303 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5304 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5305 PetscCall(MatView(*coarse_submat,viewer)); 5306 if (pcbddc->coarse_phi_B) { 5307 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5308 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5309 } 5310 if (pcbddc->coarse_phi_D) { 5311 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5312 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5313 } 5314 if (pcbddc->coarse_psi_B) { 5315 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5316 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5317 } 5318 if (pcbddc->coarse_psi_D) { 5319 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5320 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5321 } 5322 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5323 PetscCall(MatView(pcbddc->local_mat,viewer)); 5324 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5325 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5326 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5327 PetscCall(ISView(pcis->is_I_local,viewer)); 5328 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5329 PetscCall(ISView(pcis->is_B_local,viewer)); 5330 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5331 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5332 PetscCall(PetscViewerDestroy(&viewer)); 5333 } 5334 #endif 5335 5336 /* device support */ 5337 { 5338 PetscBool iscuda, iship, iskokkos; 5339 MatType mtype = NULL; 5340 5341 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5342 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5343 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5344 if (iskokkos) { 5345 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5346 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5347 } 5348 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5349 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5350 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5351 if (mtype) { 5352 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5353 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5354 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5355 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5356 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5357 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5358 } 5359 } 5360 PetscFunctionReturn(PETSC_SUCCESS); 5361 } 5362 5363 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5364 { 5365 Mat *work_mat; 5366 IS isrow_s, iscol_s; 5367 PetscBool rsorted, csorted; 5368 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5369 5370 PetscFunctionBegin; 5371 PetscCall(ISSorted(isrow, &rsorted)); 5372 PetscCall(ISSorted(iscol, &csorted)); 5373 PetscCall(ISGetLocalSize(isrow, &rsize)); 5374 PetscCall(ISGetLocalSize(iscol, &csize)); 5375 5376 if (!rsorted) { 5377 const PetscInt *idxs; 5378 PetscInt *idxs_sorted, i; 5379 5380 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5381 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5382 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5383 PetscCall(ISGetIndices(isrow, &idxs)); 5384 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5385 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5386 PetscCall(ISRestoreIndices(isrow, &idxs)); 5387 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5388 } else { 5389 PetscCall(PetscObjectReference((PetscObject)isrow)); 5390 isrow_s = isrow; 5391 } 5392 5393 if (!csorted) { 5394 if (isrow == iscol) { 5395 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5396 iscol_s = isrow_s; 5397 } else { 5398 const PetscInt *idxs; 5399 PetscInt *idxs_sorted, i; 5400 5401 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5402 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5403 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5404 PetscCall(ISGetIndices(iscol, &idxs)); 5405 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5406 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5407 PetscCall(ISRestoreIndices(iscol, &idxs)); 5408 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5409 } 5410 } else { 5411 PetscCall(PetscObjectReference((PetscObject)iscol)); 5412 iscol_s = iscol; 5413 } 5414 5415 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5416 5417 if (!rsorted || !csorted) { 5418 Mat new_mat; 5419 IS is_perm_r, is_perm_c; 5420 5421 if (!rsorted) { 5422 PetscInt *idxs_r, i; 5423 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5424 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5425 PetscCall(PetscFree(idxs_perm_r)); 5426 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5427 } else { 5428 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5429 } 5430 PetscCall(ISSetPermutation(is_perm_r)); 5431 5432 if (!csorted) { 5433 if (isrow_s == iscol_s) { 5434 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5435 is_perm_c = is_perm_r; 5436 } else { 5437 PetscInt *idxs_c, i; 5438 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5439 PetscCall(PetscMalloc1(csize, &idxs_c)); 5440 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5441 PetscCall(PetscFree(idxs_perm_c)); 5442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5443 } 5444 } else { 5445 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5446 } 5447 PetscCall(ISSetPermutation(is_perm_c)); 5448 5449 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5450 PetscCall(MatDestroy(&work_mat[0])); 5451 work_mat[0] = new_mat; 5452 PetscCall(ISDestroy(&is_perm_r)); 5453 PetscCall(ISDestroy(&is_perm_c)); 5454 } 5455 5456 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5457 *B = work_mat[0]; 5458 PetscCall(MatDestroyMatrices(1, &work_mat)); 5459 PetscCall(ISDestroy(&isrow_s)); 5460 PetscCall(ISDestroy(&iscol_s)); 5461 PetscFunctionReturn(PETSC_SUCCESS); 5462 } 5463 5464 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5465 { 5466 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5467 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5468 Mat new_mat, lA; 5469 IS is_local, is_global; 5470 PetscInt local_size; 5471 PetscBool isseqaij, issym, isset; 5472 5473 PetscFunctionBegin; 5474 PetscCall(MatDestroy(&pcbddc->local_mat)); 5475 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5476 if (pcbddc->mat_graph->multi_element) { 5477 Mat *mats, *bdiags; 5478 IS *gsubs; 5479 PetscInt nsubs = pcbddc->n_local_subs; 5480 5481 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5482 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5483 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5484 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5485 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5486 PetscCall(PetscFree(gsubs)); 5487 5488 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5489 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5490 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5491 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5492 PetscCall(PetscFree(mats)); 5493 } else { 5494 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5495 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5496 PetscCall(ISDestroy(&is_local)); 5497 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5498 PetscCall(ISDestroy(&is_global)); 5499 } 5500 if (pcbddc->dbg_flag) { 5501 Vec x, x_change; 5502 PetscReal error; 5503 5504 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5505 PetscCall(VecSetRandom(x, NULL)); 5506 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5507 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5508 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5509 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5510 if (!pcbddc->change_interior) { 5511 const PetscScalar *x, *y, *v; 5512 PetscReal lerror = 0.; 5513 PetscInt i; 5514 5515 PetscCall(VecGetArrayRead(matis->x, &x)); 5516 PetscCall(VecGetArrayRead(matis->y, &y)); 5517 PetscCall(VecGetArrayRead(matis->counter, &v)); 5518 for (i = 0; i < local_size; i++) 5519 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5520 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5521 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5522 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5523 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5524 if (error > PETSC_SMALL) { 5525 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5526 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5527 } else { 5528 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5529 } 5530 } 5531 } 5532 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5533 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5534 PetscCall(VecAXPY(x, -1.0, x_change)); 5535 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5536 if (error > PETSC_SMALL) { 5537 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5538 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5539 } else { 5540 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5541 } 5542 } 5543 PetscCall(VecDestroy(&x)); 5544 PetscCall(VecDestroy(&x_change)); 5545 } 5546 5547 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5548 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5549 5550 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5551 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5552 if (isseqaij) { 5553 PetscCall(MatDestroy(&pcbddc->local_mat)); 5554 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5555 if (lA) { 5556 Mat work; 5557 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5558 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5559 PetscCall(MatDestroy(&work)); 5560 } 5561 } else { 5562 Mat work_mat; 5563 5564 PetscCall(MatDestroy(&pcbddc->local_mat)); 5565 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5566 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5567 PetscCall(MatDestroy(&work_mat)); 5568 if (lA) { 5569 Mat work; 5570 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5571 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5572 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5573 PetscCall(MatDestroy(&work)); 5574 } 5575 } 5576 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5577 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5578 PetscCall(MatDestroy(&new_mat)); 5579 PetscFunctionReturn(PETSC_SUCCESS); 5580 } 5581 5582 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5583 { 5584 PC_IS *pcis = (PC_IS *)pc->data; 5585 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5586 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5587 PetscInt *idx_R_local = NULL; 5588 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5589 PetscInt vbs, bs; 5590 PetscBT bitmask = NULL; 5591 5592 PetscFunctionBegin; 5593 /* 5594 No need to setup local scatters if 5595 - primal space is unchanged 5596 AND 5597 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5598 AND 5599 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5600 */ 5601 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5602 /* destroy old objects */ 5603 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5604 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5605 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5606 /* Set Non-overlapping dimensions */ 5607 n_B = pcis->n_B; 5608 n_D = pcis->n - n_B; 5609 n_vertices = pcbddc->n_vertices; 5610 5611 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5612 5613 /* create auxiliary bitmask and allocate workspace */ 5614 if (!sub_schurs || !sub_schurs->reuse_solver) { 5615 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5616 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5617 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5618 5619 for (i = 0, n_R = 0; i < pcis->n; i++) { 5620 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5621 } 5622 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5623 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5624 5625 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5626 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5627 } 5628 5629 /* Block code */ 5630 vbs = 1; 5631 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5632 if (bs > 1 && !(n_vertices % bs)) { 5633 PetscBool is_blocked = PETSC_TRUE; 5634 PetscInt *vary; 5635 if (!sub_schurs || !sub_schurs->reuse_solver) { 5636 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5637 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5638 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5639 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5640 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5641 for (i = 0; i < pcis->n / bs; i++) { 5642 if (vary[i] != 0 && vary[i] != bs) { 5643 is_blocked = PETSC_FALSE; 5644 break; 5645 } 5646 } 5647 PetscCall(PetscFree(vary)); 5648 } else { 5649 /* Verify directly the R set */ 5650 for (i = 0; i < n_R / bs; i++) { 5651 PetscInt j, node = idx_R_local[bs * i]; 5652 for (j = 1; j < bs; j++) { 5653 if (node != idx_R_local[bs * i + j] - j) { 5654 is_blocked = PETSC_FALSE; 5655 break; 5656 } 5657 } 5658 } 5659 } 5660 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5661 vbs = bs; 5662 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5663 } 5664 } 5665 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5666 if (sub_schurs && sub_schurs->reuse_solver) { 5667 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5668 5669 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5670 PetscCall(ISDestroy(&reuse_solver->is_R)); 5671 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5672 reuse_solver->is_R = pcbddc->is_R_local; 5673 } else { 5674 PetscCall(PetscFree(idx_R_local)); 5675 } 5676 5677 /* print some info if requested */ 5678 if (pcbddc->dbg_flag) { 5679 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5680 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5681 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5682 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5683 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5684 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5685 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5686 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5687 } 5688 5689 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5690 if (!sub_schurs || !sub_schurs->reuse_solver) { 5691 IS is_aux1, is_aux2; 5692 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5693 5694 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5695 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5696 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5697 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5698 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5699 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5700 for (i = 0, j = 0; i < n_R; i++) { 5701 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5702 } 5703 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5704 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5705 for (i = 0, j = 0; i < n_B; i++) { 5706 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5707 } 5708 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5709 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5710 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5711 PetscCall(ISDestroy(&is_aux1)); 5712 PetscCall(ISDestroy(&is_aux2)); 5713 5714 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5715 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5716 for (i = 0, j = 0; i < n_R; i++) { 5717 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5718 } 5719 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5720 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5721 PetscCall(ISDestroy(&is_aux1)); 5722 } 5723 PetscCall(PetscBTDestroy(&bitmask)); 5724 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5725 } else { 5726 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5727 IS tis; 5728 PetscInt schur_size; 5729 5730 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5731 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5732 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5733 PetscCall(ISDestroy(&tis)); 5734 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5735 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5736 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5737 PetscCall(ISDestroy(&tis)); 5738 } 5739 } 5740 PetscFunctionReturn(PETSC_SUCCESS); 5741 } 5742 5743 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5744 { 5745 MatNullSpace NullSpace; 5746 Mat dmat; 5747 const Vec *nullvecs; 5748 Vec v, v2, *nullvecs2; 5749 VecScatter sct = NULL; 5750 PetscScalar *ddata; 5751 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5752 PetscBool nnsp_has_cnst; 5753 5754 PetscFunctionBegin; 5755 if (!is && !B) { /* MATIS */ 5756 Mat_IS *matis = (Mat_IS *)A->data; 5757 5758 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5759 sct = matis->cctx; 5760 PetscCall(PetscObjectReference((PetscObject)sct)); 5761 } else { 5762 PetscCall(MatGetNullSpace(B, &NullSpace)); 5763 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5764 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5765 } 5766 PetscCall(MatGetNullSpace(A, &NullSpace)); 5767 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5768 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5769 5770 PetscCall(MatCreateVecs(A, &v, NULL)); 5771 PetscCall(MatCreateVecs(B, &v2, NULL)); 5772 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5773 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5774 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5775 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5776 PetscCall(VecGetBlockSize(v2, &bs)); 5777 PetscCall(VecGetSize(v2, &N)); 5778 PetscCall(VecGetLocalSize(v2, &n)); 5779 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5780 for (k = 0; k < nnsp_size; k++) { 5781 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5782 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5783 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5784 } 5785 if (nnsp_has_cnst) { 5786 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5787 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5788 } 5789 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5790 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5791 5792 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5793 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5794 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5795 PetscCall(MatDestroy(&dmat)); 5796 5797 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5798 PetscCall(PetscFree(nullvecs2)); 5799 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5800 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5801 PetscCall(VecDestroy(&v)); 5802 PetscCall(VecDestroy(&v2)); 5803 PetscCall(VecScatterDestroy(&sct)); 5804 PetscFunctionReturn(PETSC_SUCCESS); 5805 } 5806 5807 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5808 { 5809 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5810 PC_IS *pcis = (PC_IS *)pc->data; 5811 PC pc_temp; 5812 Mat A_RR; 5813 MatNullSpace nnsp; 5814 MatReuse reuse; 5815 PetscScalar m_one = -1.0; 5816 PetscReal value; 5817 PetscInt n_D, n_R; 5818 PetscBool issbaij, opts, isset, issym; 5819 PetscBool f = PETSC_FALSE; 5820 char dir_prefix[256], neu_prefix[256], str_level[16]; 5821 size_t len; 5822 5823 PetscFunctionBegin; 5824 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5825 /* approximate solver, propagate NearNullSpace if needed */ 5826 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5827 MatNullSpace gnnsp1, gnnsp2; 5828 PetscBool lhas, ghas; 5829 5830 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5831 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5832 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5833 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5834 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5835 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5836 } 5837 5838 /* compute prefixes */ 5839 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5840 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5841 if (!pcbddc->current_level) { 5842 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5843 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5844 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5845 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5846 } else { 5847 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 5848 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5849 len -= 15; /* remove "pc_bddc_coarse_" */ 5850 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5851 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5852 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5853 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5854 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5855 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5856 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5857 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5858 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5859 } 5860 5861 /* DIRICHLET PROBLEM */ 5862 if (dirichlet) { 5863 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5864 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5865 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5866 if (pcbddc->dbg_flag) { 5867 Mat A_IIn; 5868 5869 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5870 PetscCall(MatDestroy(&pcis->A_II)); 5871 pcis->A_II = A_IIn; 5872 } 5873 } 5874 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5875 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5876 5877 /* Matrix for Dirichlet problem is pcis->A_II */ 5878 n_D = pcis->n - pcis->n_B; 5879 opts = PETSC_FALSE; 5880 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5881 opts = PETSC_TRUE; 5882 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5883 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5884 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5885 /* default */ 5886 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5887 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5888 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5889 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5890 if (issbaij) { 5891 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5892 } else { 5893 PetscCall(PCSetType(pc_temp, PCLU)); 5894 } 5895 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5896 } 5897 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5898 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5899 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5900 /* Allow user's customization */ 5901 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5902 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5903 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5904 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5905 } 5906 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5907 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5908 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5909 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5910 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5911 const PetscInt *idxs; 5912 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5913 5914 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5915 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5916 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5917 for (i = 0; i < nl; i++) { 5918 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5919 } 5920 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5921 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5922 PetscCall(PetscFree(scoords)); 5923 } 5924 if (sub_schurs && sub_schurs->reuse_solver) { 5925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5926 5927 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5928 } 5929 5930 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5931 if (!n_D) { 5932 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5933 PetscCall(PCSetType(pc_temp, PCNONE)); 5934 } 5935 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5936 /* set ksp_D into pcis data */ 5937 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5938 PetscCall(KSPDestroy(&pcis->ksp_D)); 5939 pcis->ksp_D = pcbddc->ksp_D; 5940 } 5941 5942 /* NEUMANN PROBLEM */ 5943 A_RR = NULL; 5944 if (neumann) { 5945 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5946 PetscInt ibs, mbs; 5947 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5948 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5949 5950 reuse_neumann_solver = PETSC_FALSE; 5951 if (sub_schurs && sub_schurs->reuse_solver) { 5952 IS iP; 5953 5954 reuse_neumann_solver = PETSC_TRUE; 5955 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5956 if (iP) reuse_neumann_solver = PETSC_FALSE; 5957 } 5958 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5959 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5960 if (pcbddc->ksp_R) { /* already created ksp */ 5961 PetscInt nn_R; 5962 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5963 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5964 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5965 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5966 PetscCall(KSPReset(pcbddc->ksp_R)); 5967 PetscCall(MatDestroy(&A_RR)); 5968 reuse = MAT_INITIAL_MATRIX; 5969 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5970 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5971 PetscCall(MatDestroy(&A_RR)); 5972 reuse = MAT_INITIAL_MATRIX; 5973 } else { /* safe to reuse the matrix */ 5974 reuse = MAT_REUSE_MATRIX; 5975 } 5976 } 5977 /* last check */ 5978 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5979 PetscCall(MatDestroy(&A_RR)); 5980 reuse = MAT_INITIAL_MATRIX; 5981 } 5982 } else { /* first time, so we need to create the matrix */ 5983 reuse = MAT_INITIAL_MATRIX; 5984 } 5985 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5986 TODO: Get Rid of these conversions */ 5987 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5988 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5989 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5990 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5991 if (matis->A == pcbddc->local_mat) { 5992 PetscCall(MatDestroy(&pcbddc->local_mat)); 5993 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5994 } else { 5995 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5996 } 5997 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 5998 if (matis->A == pcbddc->local_mat) { 5999 PetscCall(MatDestroy(&pcbddc->local_mat)); 6000 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6001 } else { 6002 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6003 } 6004 } 6005 /* extract A_RR */ 6006 if (reuse_neumann_solver) { 6007 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6008 6009 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6010 PetscCall(MatDestroy(&A_RR)); 6011 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6012 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6013 } else { 6014 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6015 } 6016 } else { 6017 PetscCall(MatDestroy(&A_RR)); 6018 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6019 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6020 } 6021 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6022 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6023 } 6024 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6025 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6026 opts = PETSC_FALSE; 6027 if (!pcbddc->ksp_R) { /* create object if not present */ 6028 opts = PETSC_TRUE; 6029 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6030 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6031 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6032 /* default */ 6033 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6034 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6035 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6036 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6037 if (issbaij) { 6038 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6039 } else { 6040 PetscCall(PCSetType(pc_temp, PCLU)); 6041 } 6042 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6043 } 6044 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6045 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6046 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6047 if (opts) { /* Allow user's customization once */ 6048 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6049 } 6050 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6051 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6052 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6053 } 6054 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6055 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6056 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6057 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6058 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6059 const PetscInt *idxs; 6060 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6061 6062 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6063 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6064 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6065 for (i = 0; i < nl; i++) { 6066 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6067 } 6068 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6069 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6070 PetscCall(PetscFree(scoords)); 6071 } 6072 6073 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6074 if (!n_R) { 6075 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6076 PetscCall(PCSetType(pc_temp, PCNONE)); 6077 } 6078 /* Reuse solver if it is present */ 6079 if (reuse_neumann_solver) { 6080 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6081 6082 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6083 } 6084 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6085 } 6086 6087 if (pcbddc->dbg_flag) { 6088 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6089 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6090 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6091 } 6092 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6093 6094 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6095 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6096 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6097 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6098 /* check Dirichlet and Neumann solvers */ 6099 if (pcbddc->dbg_flag) { 6100 if (dirichlet) { /* Dirichlet */ 6101 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6102 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6103 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6104 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6105 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6106 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6107 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6108 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6109 } 6110 if (neumann) { /* Neumann */ 6111 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6112 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6113 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6114 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6115 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6116 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6117 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6118 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6119 } 6120 } 6121 /* free Neumann problem's matrix */ 6122 PetscCall(MatDestroy(&A_RR)); 6123 PetscFunctionReturn(PETSC_SUCCESS); 6124 } 6125 6126 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6127 { 6128 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6129 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6130 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6131 6132 PetscFunctionBegin; 6133 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6134 if (!pcbddc->switch_static) { 6135 if (applytranspose && pcbddc->local_auxmat1) { 6136 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6137 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6138 } 6139 if (!reuse_solver) { 6140 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6141 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6142 } else { 6143 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6144 6145 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6146 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6147 } 6148 } else { 6149 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6150 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6151 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6152 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6153 if (applytranspose && pcbddc->local_auxmat1) { 6154 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6155 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6156 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6157 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6158 } 6159 } 6160 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6161 if (!reuse_solver || pcbddc->switch_static) { 6162 if (applytranspose) { 6163 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6164 } else { 6165 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6166 } 6167 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6168 } else { 6169 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6170 6171 if (applytranspose) { 6172 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6173 } else { 6174 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6175 } 6176 } 6177 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6178 PetscCall(VecSet(inout_B, 0.)); 6179 if (!pcbddc->switch_static) { 6180 if (!reuse_solver) { 6181 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6182 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6183 } else { 6184 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6185 6186 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6187 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6188 } 6189 if (!applytranspose && pcbddc->local_auxmat1) { 6190 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6191 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6192 } 6193 } else { 6194 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6195 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6196 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6197 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6198 if (!applytranspose && pcbddc->local_auxmat1) { 6199 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6200 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6201 } 6202 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6203 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6204 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6205 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6206 } 6207 PetscFunctionReturn(PETSC_SUCCESS); 6208 } 6209 6210 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6211 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6212 { 6213 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6214 PC_IS *pcis = (PC_IS *)pc->data; 6215 const PetscScalar zero = 0.0; 6216 6217 PetscFunctionBegin; 6218 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6219 if (!pcbddc->benign_apply_coarse_only) { 6220 if (applytranspose) { 6221 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6222 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6223 } else { 6224 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6225 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6226 } 6227 } else { 6228 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6229 } 6230 6231 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6232 if (pcbddc->benign_n) { 6233 PetscScalar *array; 6234 PetscInt j; 6235 6236 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6237 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6238 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6239 } 6240 6241 /* start communications from local primal nodes to rhs of coarse solver */ 6242 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6243 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6244 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6245 6246 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6247 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6248 if (pcbddc->coarse_ksp) { 6249 Mat coarse_mat; 6250 Vec rhs, sol; 6251 MatNullSpace nullsp; 6252 PetscBool isbddc = PETSC_FALSE; 6253 6254 if (pcbddc->benign_have_null) { 6255 PC coarse_pc; 6256 6257 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6258 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6259 /* we need to propagate to coarser levels the need for a possible benign correction */ 6260 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6261 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6262 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6263 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6264 } 6265 } 6266 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6267 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6268 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6269 if (applytranspose) { 6270 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6271 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6272 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6273 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6274 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6275 } else { 6276 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6277 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6278 PC coarse_pc; 6279 6280 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6281 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6282 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6283 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6284 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6285 } else { 6286 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6287 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6288 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6289 } 6290 } 6291 /* we don't need the benign correction at coarser levels anymore */ 6292 if (pcbddc->benign_have_null && isbddc) { 6293 PC coarse_pc; 6294 PC_BDDC *coarsepcbddc; 6295 6296 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6297 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6298 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6299 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6300 } 6301 } 6302 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6303 6304 /* Local solution on R nodes */ 6305 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6306 /* communications from coarse sol to local primal nodes */ 6307 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6308 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6309 6310 /* Sum contributions from the two levels */ 6311 if (!pcbddc->benign_apply_coarse_only) { 6312 if (applytranspose) { 6313 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6314 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6315 } else { 6316 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6317 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6318 } 6319 /* store p0 */ 6320 if (pcbddc->benign_n) { 6321 PetscScalar *array; 6322 PetscInt j; 6323 6324 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6325 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6326 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6327 } 6328 } else { /* expand the coarse solution */ 6329 if (applytranspose) { 6330 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6331 } else { 6332 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6333 } 6334 } 6335 PetscFunctionReturn(PETSC_SUCCESS); 6336 } 6337 6338 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6339 { 6340 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6341 Vec from, to; 6342 const PetscScalar *array; 6343 6344 PetscFunctionBegin; 6345 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6346 from = pcbddc->coarse_vec; 6347 to = pcbddc->vec1_P; 6348 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6349 Vec tvec; 6350 6351 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6352 PetscCall(VecResetArray(tvec)); 6353 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6354 PetscCall(VecGetArrayRead(tvec, &array)); 6355 PetscCall(VecPlaceArray(from, array)); 6356 PetscCall(VecRestoreArrayRead(tvec, &array)); 6357 } 6358 } else { /* from local to global -> put data in coarse right-hand side */ 6359 from = pcbddc->vec1_P; 6360 to = pcbddc->coarse_vec; 6361 } 6362 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6363 PetscFunctionReturn(PETSC_SUCCESS); 6364 } 6365 6366 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6367 { 6368 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6369 Vec from, to; 6370 const PetscScalar *array; 6371 6372 PetscFunctionBegin; 6373 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6374 from = pcbddc->coarse_vec; 6375 to = pcbddc->vec1_P; 6376 } else { /* from local to global -> put data in coarse right-hand side */ 6377 from = pcbddc->vec1_P; 6378 to = pcbddc->coarse_vec; 6379 } 6380 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6381 if (smode == SCATTER_FORWARD) { 6382 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6383 Vec tvec; 6384 6385 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6386 PetscCall(VecGetArrayRead(to, &array)); 6387 PetscCall(VecPlaceArray(tvec, array)); 6388 PetscCall(VecRestoreArrayRead(to, &array)); 6389 } 6390 } else { 6391 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6392 PetscCall(VecResetArray(from)); 6393 } 6394 } 6395 PetscFunctionReturn(PETSC_SUCCESS); 6396 } 6397 6398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6399 { 6400 PC_IS *pcis = (PC_IS *)pc->data; 6401 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6402 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6403 /* one and zero */ 6404 PetscScalar one = 1.0, zero = 0.0; 6405 /* space to store constraints and their local indices */ 6406 PetscScalar *constraints_data; 6407 PetscInt *constraints_idxs, *constraints_idxs_B; 6408 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6409 PetscInt *constraints_n; 6410 /* iterators */ 6411 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6412 /* BLAS integers */ 6413 PetscBLASInt lwork, lierr; 6414 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6415 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6416 /* reuse */ 6417 PetscInt olocal_primal_size, olocal_primal_size_cc; 6418 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6419 /* change of basis */ 6420 PetscBool qr_needed; 6421 PetscBT change_basis, qr_needed_idx; 6422 /* auxiliary stuff */ 6423 PetscInt *nnz, *is_indices; 6424 PetscInt ncc; 6425 /* some quantities */ 6426 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6427 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6428 PetscReal tol; /* tolerance for retaining eigenmodes */ 6429 6430 PetscFunctionBegin; 6431 tol = PetscSqrtReal(PETSC_SMALL); 6432 /* Destroy Mat objects computed previously */ 6433 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6434 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6435 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6436 /* save info on constraints from previous setup (if any) */ 6437 olocal_primal_size = pcbddc->local_primal_size; 6438 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6439 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6440 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6441 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6442 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6443 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6444 6445 if (!pcbddc->adaptive_selection) { 6446 IS ISForVertices, *ISForFaces, *ISForEdges; 6447 MatNullSpace nearnullsp; 6448 const Vec *nearnullvecs; 6449 Vec *localnearnullsp; 6450 PetscScalar *array; 6451 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6452 PetscBool nnsp_has_cnst; 6453 /* LAPACK working arrays for SVD or POD */ 6454 PetscBool skip_lapack, boolforchange; 6455 PetscScalar *work; 6456 PetscReal *singular_vals; 6457 #if defined(PETSC_USE_COMPLEX) 6458 PetscReal *rwork; 6459 #endif 6460 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6461 PetscBLASInt dummy_int = 1; 6462 PetscScalar dummy_scalar = 1.; 6463 PetscBool use_pod = PETSC_FALSE; 6464 6465 /* MKL SVD with same input gives different results on different processes! */ 6466 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6467 use_pod = PETSC_TRUE; 6468 #endif 6469 /* Get index sets for faces, edges and vertices from graph */ 6470 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6471 o_nf = n_ISForFaces; 6472 o_ne = n_ISForEdges; 6473 n_vertices = 0; 6474 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6475 /* print some info */ 6476 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6477 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6478 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6479 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6480 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6481 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6482 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6483 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6484 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6485 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6486 } 6487 6488 if (!pcbddc->use_vertices) n_vertices = 0; 6489 if (!pcbddc->use_edges) n_ISForEdges = 0; 6490 if (!pcbddc->use_faces) n_ISForFaces = 0; 6491 6492 /* check if near null space is attached to global mat */ 6493 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6494 else nearnullsp = NULL; 6495 6496 if (nearnullsp) { 6497 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6498 /* remove any stored info */ 6499 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6500 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6501 /* store information for BDDC solver reuse */ 6502 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6503 pcbddc->onearnullspace = nearnullsp; 6504 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6505 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6506 } else { /* if near null space is not provided BDDC uses constants by default */ 6507 nnsp_size = 0; 6508 nnsp_has_cnst = PETSC_TRUE; 6509 } 6510 /* get max number of constraints on a single cc */ 6511 max_constraints = nnsp_size; 6512 if (nnsp_has_cnst) max_constraints++; 6513 6514 /* 6515 Evaluate maximum storage size needed by the procedure 6516 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6517 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6518 There can be multiple constraints per connected component 6519 */ 6520 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6521 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6522 6523 total_counts = n_ISForFaces + n_ISForEdges; 6524 total_counts *= max_constraints; 6525 total_counts += n_vertices; 6526 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6527 6528 total_counts = 0; 6529 max_size_of_constraint = 0; 6530 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6531 IS used_is; 6532 if (i < n_ISForEdges) { 6533 used_is = ISForEdges[i]; 6534 } else { 6535 used_is = ISForFaces[i - n_ISForEdges]; 6536 } 6537 PetscCall(ISGetSize(used_is, &j)); 6538 total_counts += j; 6539 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6540 } 6541 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6542 6543 /* get local part of global near null space vectors */ 6544 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6545 for (k = 0; k < nnsp_size; k++) { 6546 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6547 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6548 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6549 } 6550 6551 /* whether or not to skip lapack calls */ 6552 skip_lapack = PETSC_TRUE; 6553 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6554 6555 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6556 if (!skip_lapack) { 6557 PetscScalar temp_work; 6558 6559 if (use_pod) { 6560 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6561 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6562 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6563 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6564 #if defined(PETSC_USE_COMPLEX) 6565 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6566 #endif 6567 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6568 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6569 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6570 lwork = -1; 6571 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6572 #if !defined(PETSC_USE_COMPLEX) 6573 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6574 #else 6575 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6576 #endif 6577 PetscCall(PetscFPTrapPop()); 6578 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6579 } else { 6580 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6581 /* SVD */ 6582 PetscInt max_n, min_n; 6583 max_n = max_size_of_constraint; 6584 min_n = max_constraints; 6585 if (max_size_of_constraint < max_constraints) { 6586 min_n = max_size_of_constraint; 6587 max_n = max_constraints; 6588 } 6589 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6590 #if defined(PETSC_USE_COMPLEX) 6591 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6592 #endif 6593 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6594 lwork = -1; 6595 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6596 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6597 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6598 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6599 #if !defined(PETSC_USE_COMPLEX) 6600 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6601 #else 6602 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6603 #endif 6604 PetscCall(PetscFPTrapPop()); 6605 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6606 #else 6607 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6608 #endif /* on missing GESVD */ 6609 } 6610 /* Allocate optimal workspace */ 6611 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6612 PetscCall(PetscMalloc1(lwork, &work)); 6613 } 6614 /* Now we can loop on constraining sets */ 6615 total_counts = 0; 6616 constraints_idxs_ptr[0] = 0; 6617 constraints_data_ptr[0] = 0; 6618 /* vertices */ 6619 if (n_vertices) { 6620 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6621 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6622 for (i = 0; i < n_vertices; i++) { 6623 constraints_n[total_counts] = 1; 6624 constraints_data[total_counts] = 1.0; 6625 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6626 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6627 total_counts++; 6628 } 6629 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6630 } 6631 6632 /* edges and faces */ 6633 total_counts_cc = total_counts; 6634 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6635 IS used_is; 6636 PetscBool idxs_copied = PETSC_FALSE; 6637 6638 if (ncc < n_ISForEdges) { 6639 used_is = ISForEdges[ncc]; 6640 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6641 } else { 6642 used_is = ISForFaces[ncc - n_ISForEdges]; 6643 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6644 } 6645 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6646 6647 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6648 if (!size_of_constraint) continue; 6649 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6650 if (nnsp_has_cnst) { 6651 PetscScalar quad_value; 6652 6653 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6654 idxs_copied = PETSC_TRUE; 6655 6656 if (!pcbddc->use_nnsp_true) { 6657 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6658 } else { 6659 quad_value = 1.0; 6660 } 6661 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6662 temp_constraints++; 6663 total_counts++; 6664 } 6665 for (k = 0; k < nnsp_size; k++) { 6666 PetscReal real_value; 6667 PetscScalar *ptr_to_data; 6668 6669 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6670 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6671 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6672 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6673 /* check if array is null on the connected component */ 6674 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6675 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6676 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6677 temp_constraints++; 6678 total_counts++; 6679 if (!idxs_copied) { 6680 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6681 idxs_copied = PETSC_TRUE; 6682 } 6683 } 6684 } 6685 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6686 valid_constraints = temp_constraints; 6687 if (!pcbddc->use_nnsp_true && temp_constraints) { 6688 if (temp_constraints == 1) { /* just normalize the constraint */ 6689 PetscScalar norm, *ptr_to_data; 6690 6691 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6692 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6693 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6694 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6695 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6696 } else { /* perform SVD */ 6697 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6698 6699 if (use_pod) { 6700 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6701 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6702 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6703 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6704 from that computed using LAPACKgesvd 6705 -> This is due to a different computation of eigenvectors in LAPACKheev 6706 -> The quality of the POD-computed basis will be the same */ 6707 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6708 /* Store upper triangular part of correlation matrix */ 6709 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6710 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6711 for (j = 0; j < temp_constraints; j++) { 6712 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6713 } 6714 /* compute eigenvalues and eigenvectors of correlation matrix */ 6715 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6716 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6717 #if !defined(PETSC_USE_COMPLEX) 6718 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6719 #else 6720 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6721 #endif 6722 PetscCall(PetscFPTrapPop()); 6723 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6724 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6725 j = 0; 6726 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6727 total_counts = total_counts - j; 6728 valid_constraints = temp_constraints - j; 6729 /* scale and copy POD basis into used quadrature memory */ 6730 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6731 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6732 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6733 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6734 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6735 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6736 if (j < temp_constraints) { 6737 PetscInt ii; 6738 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6739 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6740 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6741 PetscCall(PetscFPTrapPop()); 6742 for (k = 0; k < temp_constraints - j; k++) { 6743 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6744 } 6745 } 6746 } else { 6747 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6748 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6749 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6750 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6751 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6752 #if !defined(PETSC_USE_COMPLEX) 6753 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6754 #else 6755 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6756 #endif 6757 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6758 PetscCall(PetscFPTrapPop()); 6759 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6760 k = temp_constraints; 6761 if (k > size_of_constraint) k = size_of_constraint; 6762 j = 0; 6763 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6764 valid_constraints = k - j; 6765 total_counts = total_counts - temp_constraints + valid_constraints; 6766 #else 6767 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6768 #endif /* on missing GESVD */ 6769 } 6770 } 6771 } 6772 /* update pointers information */ 6773 if (valid_constraints) { 6774 constraints_n[total_counts_cc] = valid_constraints; 6775 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6776 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6777 /* set change_of_basis flag */ 6778 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6779 total_counts_cc++; 6780 } 6781 } 6782 /* free workspace */ 6783 if (!skip_lapack) { 6784 PetscCall(PetscFree(work)); 6785 #if defined(PETSC_USE_COMPLEX) 6786 PetscCall(PetscFree(rwork)); 6787 #endif 6788 PetscCall(PetscFree(singular_vals)); 6789 PetscCall(PetscFree(correlation_mat)); 6790 PetscCall(PetscFree(temp_basis)); 6791 } 6792 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6793 PetscCall(PetscFree(localnearnullsp)); 6794 /* free index sets of faces, edges and vertices */ 6795 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6796 } else { 6797 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6798 6799 total_counts = 0; 6800 n_vertices = 0; 6801 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6802 max_constraints = 0; 6803 total_counts_cc = 0; 6804 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6805 total_counts += pcbddc->adaptive_constraints_n[i]; 6806 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6807 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6808 } 6809 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6810 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6811 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6812 constraints_data = pcbddc->adaptive_constraints_data; 6813 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6814 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6815 total_counts_cc = 0; 6816 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6817 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6818 } 6819 6820 max_size_of_constraint = 0; 6821 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6822 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6823 /* Change of basis */ 6824 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6825 if (pcbddc->use_change_of_basis) { 6826 for (i = 0; i < sub_schurs->n_subs; i++) { 6827 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6828 } 6829 } 6830 } 6831 pcbddc->local_primal_size = total_counts; 6832 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6833 6834 /* map constraints_idxs in boundary numbering */ 6835 if (pcbddc->use_change_of_basis) { 6836 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6837 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6838 } 6839 6840 /* Create constraint matrix */ 6841 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6842 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6843 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6844 6845 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6846 /* determine if a QR strategy is needed for change of basis */ 6847 qr_needed = pcbddc->use_qr_single; 6848 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6849 total_primal_vertices = 0; 6850 pcbddc->local_primal_size_cc = 0; 6851 for (i = 0; i < total_counts_cc; i++) { 6852 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6853 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6854 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6855 pcbddc->local_primal_size_cc += 1; 6856 } else if (PetscBTLookup(change_basis, i)) { 6857 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6858 pcbddc->local_primal_size_cc += constraints_n[i]; 6859 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6860 PetscCall(PetscBTSet(qr_needed_idx, i)); 6861 qr_needed = PETSC_TRUE; 6862 } 6863 } else { 6864 pcbddc->local_primal_size_cc += 1; 6865 } 6866 } 6867 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6868 pcbddc->n_vertices = total_primal_vertices; 6869 /* permute indices in order to have a sorted set of vertices */ 6870 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6871 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6872 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6873 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6874 6875 /* nonzero structure of constraint matrix */ 6876 /* and get reference dof for local constraints */ 6877 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6878 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6879 6880 j = total_primal_vertices; 6881 total_counts = total_primal_vertices; 6882 cum = total_primal_vertices; 6883 for (i = n_vertices; i < total_counts_cc; i++) { 6884 if (!PetscBTLookup(change_basis, i)) { 6885 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6886 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6887 cum++; 6888 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6889 for (k = 0; k < constraints_n[i]; k++) { 6890 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6891 nnz[j + k] = size_of_constraint; 6892 } 6893 j += constraints_n[i]; 6894 } 6895 } 6896 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6897 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6898 PetscCall(PetscFree(nnz)); 6899 6900 /* set values in constraint matrix */ 6901 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6902 total_counts = total_primal_vertices; 6903 for (i = n_vertices; i < total_counts_cc; i++) { 6904 if (!PetscBTLookup(change_basis, i)) { 6905 PetscInt *cols; 6906 6907 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6908 cols = constraints_idxs + constraints_idxs_ptr[i]; 6909 for (k = 0; k < constraints_n[i]; k++) { 6910 PetscInt row = total_counts + k; 6911 PetscScalar *vals; 6912 6913 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6914 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6915 } 6916 total_counts += constraints_n[i]; 6917 } 6918 } 6919 /* assembling */ 6920 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6921 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6922 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6923 6924 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6925 if (pcbddc->use_change_of_basis) { 6926 /* dual and primal dofs on a single cc */ 6927 PetscInt dual_dofs, primal_dofs; 6928 /* working stuff for GEQRF */ 6929 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6930 PetscBLASInt lqr_work; 6931 /* working stuff for UNGQR */ 6932 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6933 PetscBLASInt lgqr_work; 6934 /* working stuff for TRTRS */ 6935 PetscScalar *trs_rhs = NULL; 6936 PetscBLASInt Blas_NRHS; 6937 /* pointers for values insertion into change of basis matrix */ 6938 PetscInt *start_rows, *start_cols; 6939 PetscScalar *start_vals; 6940 /* working stuff for values insertion */ 6941 PetscBT is_primal; 6942 PetscInt *aux_primal_numbering_B; 6943 /* matrix sizes */ 6944 PetscInt global_size, local_size; 6945 /* temporary change of basis */ 6946 Mat localChangeOfBasisMatrix; 6947 /* extra space for debugging */ 6948 PetscScalar *dbg_work = NULL; 6949 6950 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6951 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6952 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6953 /* nonzeros for local mat */ 6954 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6955 if (!pcbddc->benign_change || pcbddc->fake_change) { 6956 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6957 } else { 6958 const PetscInt *ii; 6959 PetscInt n; 6960 PetscBool flg_row; 6961 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6962 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6963 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6964 } 6965 for (i = n_vertices; i < total_counts_cc; i++) { 6966 if (PetscBTLookup(change_basis, i)) { 6967 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6968 if (PetscBTLookup(qr_needed_idx, i)) { 6969 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6970 } else { 6971 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6972 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6973 } 6974 } 6975 } 6976 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6977 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6978 PetscCall(PetscFree(nnz)); 6979 /* Set interior change in the matrix */ 6980 if (!pcbddc->benign_change || pcbddc->fake_change) { 6981 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6982 } else { 6983 const PetscInt *ii, *jj; 6984 PetscScalar *aa; 6985 PetscInt n; 6986 PetscBool flg_row; 6987 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6988 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6989 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6990 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6991 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6992 } 6993 6994 if (pcbddc->dbg_flag) { 6995 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6996 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6997 } 6998 6999 /* Now we loop on the constraints which need a change of basis */ 7000 /* 7001 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7002 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7003 7004 Basic blocks of change of basis matrix T computed: 7005 7006 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7007 7008 | 1 0 ... 0 s_1/S | 7009 | 0 1 ... 0 s_2/S | 7010 | ... | 7011 | 0 ... 1 s_{n-1}/S | 7012 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7013 7014 with S = \sum_{i=1}^n s_i^2 7015 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7016 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7017 7018 - QR decomposition of constraints otherwise 7019 */ 7020 if (qr_needed && max_size_of_constraint) { 7021 /* space to store Q */ 7022 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7023 /* array to store scaling factors for reflectors */ 7024 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7025 /* first we issue queries for optimal work */ 7026 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7027 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7028 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7029 lqr_work = -1; 7030 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7031 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7032 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7033 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7034 lgqr_work = -1; 7035 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7036 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7037 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7038 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7039 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7040 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7041 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7042 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7043 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7044 /* array to store rhs and solution of triangular solver */ 7045 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7046 /* allocating workspace for check */ 7047 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7048 } 7049 /* array to store whether a node is primal or not */ 7050 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7051 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7052 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7053 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7054 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7055 PetscCall(PetscFree(aux_primal_numbering_B)); 7056 7057 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7058 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7059 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7060 if (PetscBTLookup(change_basis, total_counts)) { 7061 /* get constraint info */ 7062 primal_dofs = constraints_n[total_counts]; 7063 dual_dofs = size_of_constraint - primal_dofs; 7064 7065 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7066 7067 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7068 7069 /* copy quadrature constraints for change of basis check */ 7070 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7071 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7072 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7073 7074 /* compute QR decomposition of constraints */ 7075 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7076 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7077 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7078 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7079 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7080 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7081 PetscCall(PetscFPTrapPop()); 7082 7083 /* explicitly compute R^-T */ 7084 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7085 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7086 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7087 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7088 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7089 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7090 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7091 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7092 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7093 PetscCall(PetscFPTrapPop()); 7094 7095 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7096 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7097 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7098 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7099 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7100 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7101 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7102 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7103 PetscCall(PetscFPTrapPop()); 7104 7105 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7106 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7107 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7108 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7109 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7110 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7111 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7112 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7113 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7114 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7115 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7116 PetscCall(PetscFPTrapPop()); 7117 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7118 7119 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7120 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7121 /* insert cols for primal dofs */ 7122 for (j = 0; j < primal_dofs; j++) { 7123 start_vals = &qr_basis[j * size_of_constraint]; 7124 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7125 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7126 } 7127 /* insert cols for dual dofs */ 7128 for (j = 0, k = 0; j < dual_dofs; k++) { 7129 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7130 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7131 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7132 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7133 j++; 7134 } 7135 } 7136 7137 /* check change of basis */ 7138 if (pcbddc->dbg_flag) { 7139 PetscInt ii, jj; 7140 PetscBool valid_qr = PETSC_TRUE; 7141 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7142 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7143 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7144 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7145 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7146 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7147 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7148 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7149 PetscCall(PetscFPTrapPop()); 7150 for (jj = 0; jj < size_of_constraint; jj++) { 7151 for (ii = 0; ii < primal_dofs; ii++) { 7152 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7153 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7154 } 7155 } 7156 if (!valid_qr) { 7157 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7158 for (jj = 0; jj < size_of_constraint; jj++) { 7159 for (ii = 0; ii < primal_dofs; ii++) { 7160 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7161 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7162 } 7163 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7164 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7165 } 7166 } 7167 } 7168 } else { 7169 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7170 } 7171 } 7172 } else { /* simple transformation block */ 7173 PetscInt row, col; 7174 PetscScalar val, norm; 7175 7176 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7177 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7178 for (j = 0; j < size_of_constraint; j++) { 7179 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7180 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7181 if (!PetscBTLookup(is_primal, row_B)) { 7182 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7183 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7184 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7185 } else { 7186 for (k = 0; k < size_of_constraint; k++) { 7187 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7188 if (row != col) { 7189 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7190 } else { 7191 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7192 } 7193 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7194 } 7195 } 7196 } 7197 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7198 } 7199 } else { 7200 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7201 } 7202 } 7203 7204 /* free workspace */ 7205 if (qr_needed) { 7206 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7207 PetscCall(PetscFree(trs_rhs)); 7208 PetscCall(PetscFree(qr_tau)); 7209 PetscCall(PetscFree(qr_work)); 7210 PetscCall(PetscFree(gqr_work)); 7211 PetscCall(PetscFree(qr_basis)); 7212 } 7213 PetscCall(PetscBTDestroy(&is_primal)); 7214 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7215 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7216 7217 /* assembling of global change of variable */ 7218 if (!pcbddc->fake_change) { 7219 Mat tmat; 7220 PetscInt bs; 7221 7222 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7223 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7224 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7225 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7226 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7227 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7228 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 7229 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 7230 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 7231 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 7232 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 7233 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 7234 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7235 PetscCall(MatDestroy(&tmat)); 7236 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7237 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7238 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7239 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7240 PetscCall(VecReciprocal(pcis->vec1_global)); 7241 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7242 7243 /* check */ 7244 if (pcbddc->dbg_flag) { 7245 PetscReal error; 7246 Vec x, x_change; 7247 7248 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7249 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7250 PetscCall(VecSetRandom(x, NULL)); 7251 PetscCall(VecCopy(x, pcis->vec1_global)); 7252 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7253 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7254 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7255 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7256 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7257 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7258 PetscCall(VecAXPY(x, -1.0, x_change)); 7259 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7260 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7261 PetscCall(VecDestroy(&x)); 7262 PetscCall(VecDestroy(&x_change)); 7263 } 7264 /* adapt sub_schurs computed (if any) */ 7265 if (pcbddc->use_deluxe_scaling) { 7266 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7267 7268 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7269 if (sub_schurs && sub_schurs->S_Ej_all) { 7270 Mat S_new, tmat; 7271 IS is_all_N, is_V_Sall = NULL; 7272 7273 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7274 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7275 if (pcbddc->deluxe_zerorows) { 7276 ISLocalToGlobalMapping NtoSall; 7277 IS is_V; 7278 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7279 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7280 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7281 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7282 PetscCall(ISDestroy(&is_V)); 7283 } 7284 PetscCall(ISDestroy(&is_all_N)); 7285 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7286 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7287 PetscCall(PetscObjectReference((PetscObject)S_new)); 7288 if (pcbddc->deluxe_zerorows) { 7289 const PetscScalar *array; 7290 const PetscInt *idxs_V, *idxs_all; 7291 PetscInt i, n_V; 7292 7293 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7294 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7295 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7296 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7297 PetscCall(VecGetArrayRead(pcis->D, &array)); 7298 for (i = 0; i < n_V; i++) { 7299 PetscScalar val; 7300 PetscInt idx; 7301 7302 idx = idxs_V[i]; 7303 val = array[idxs_all[idxs_V[i]]]; 7304 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7305 } 7306 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7307 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7308 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7309 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7310 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7311 } 7312 sub_schurs->S_Ej_all = S_new; 7313 PetscCall(MatDestroy(&S_new)); 7314 if (sub_schurs->sum_S_Ej_all) { 7315 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7316 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7317 PetscCall(PetscObjectReference((PetscObject)S_new)); 7318 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7319 sub_schurs->sum_S_Ej_all = S_new; 7320 PetscCall(MatDestroy(&S_new)); 7321 } 7322 PetscCall(ISDestroy(&is_V_Sall)); 7323 PetscCall(MatDestroy(&tmat)); 7324 } 7325 /* destroy any change of basis context in sub_schurs */ 7326 if (sub_schurs && sub_schurs->change) { 7327 PetscInt i; 7328 7329 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7330 PetscCall(PetscFree(sub_schurs->change)); 7331 } 7332 } 7333 if (pcbddc->switch_static) { /* need to save the local change */ 7334 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7335 } else { 7336 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7337 } 7338 /* determine if any process has changed the pressures locally */ 7339 pcbddc->change_interior = pcbddc->benign_have_null; 7340 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7341 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7342 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7343 pcbddc->use_qr_single = qr_needed; 7344 } 7345 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7346 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7347 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7348 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7349 } else { 7350 Mat benign_global = NULL; 7351 if (pcbddc->benign_have_null) { 7352 Mat M; 7353 7354 pcbddc->change_interior = PETSC_TRUE; 7355 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7356 PetscCall(VecReciprocal(pcis->vec1_N)); 7357 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7358 if (pcbddc->benign_change) { 7359 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7360 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7361 } else { 7362 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7363 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7364 } 7365 PetscCall(MatISSetLocalMat(benign_global, M)); 7366 PetscCall(MatDestroy(&M)); 7367 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7368 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7369 } 7370 if (pcbddc->user_ChangeOfBasisMatrix) { 7371 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7372 PetscCall(MatDestroy(&benign_global)); 7373 } else if (pcbddc->benign_have_null) { 7374 pcbddc->ChangeOfBasisMatrix = benign_global; 7375 } 7376 } 7377 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7378 IS is_global; 7379 const PetscInt *gidxs; 7380 7381 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7382 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7383 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7384 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7385 PetscCall(ISDestroy(&is_global)); 7386 } 7387 } 7388 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7389 7390 if (!pcbddc->fake_change) { 7391 /* add pressure dofs to set of primal nodes for numbering purposes */ 7392 for (i = 0; i < pcbddc->benign_n; i++) { 7393 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7394 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7395 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7396 pcbddc->local_primal_size_cc++; 7397 pcbddc->local_primal_size++; 7398 } 7399 7400 /* check if a new primal space has been introduced (also take into account benign trick) */ 7401 pcbddc->new_primal_space_local = PETSC_TRUE; 7402 if (olocal_primal_size == pcbddc->local_primal_size) { 7403 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7404 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7405 if (!pcbddc->new_primal_space_local) { 7406 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7407 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7408 } 7409 } 7410 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7411 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7412 } 7413 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7414 7415 /* flush dbg viewer */ 7416 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7417 7418 /* free workspace */ 7419 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7420 PetscCall(PetscBTDestroy(&change_basis)); 7421 if (!pcbddc->adaptive_selection) { 7422 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7423 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7424 } else { 7425 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7426 PetscCall(PetscFree(constraints_n)); 7427 PetscCall(PetscFree(constraints_idxs_B)); 7428 } 7429 PetscFunctionReturn(PETSC_SUCCESS); 7430 } 7431 7432 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7433 { 7434 ISLocalToGlobalMapping map; 7435 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7436 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7437 PetscInt i, N; 7438 PetscBool rcsr = PETSC_FALSE; 7439 7440 PetscFunctionBegin; 7441 if (pcbddc->recompute_topography) { 7442 pcbddc->graphanalyzed = PETSC_FALSE; 7443 /* Reset previously computed graph */ 7444 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7445 /* Init local Graph struct */ 7446 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7447 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7448 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7449 7450 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7451 /* Check validity of the csr graph passed in by the user */ 7452 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7453 pcbddc->mat_graph->nvtxs); 7454 7455 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7456 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7457 PetscInt *xadj, *adjncy; 7458 PetscInt nvtxs; 7459 PetscBool flg_row; 7460 Mat A; 7461 7462 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7463 A = matis->A; 7464 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7465 Mat AtA; 7466 7467 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7468 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7469 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7470 PetscCall(MatProductSetFromOptions(AtA)); 7471 PetscCall(MatProductSymbolic(AtA)); 7472 PetscCall(MatProductClear(AtA)); 7473 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7474 AtA->assembled = PETSC_TRUE; 7475 PetscCall(MatDestroy(&A)); 7476 A = AtA; 7477 } 7478 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7479 if (flg_row) { 7480 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7481 pcbddc->computed_rowadj = PETSC_TRUE; 7482 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7483 rcsr = PETSC_TRUE; 7484 } 7485 PetscCall(MatDestroy(&A)); 7486 } 7487 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7488 7489 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7490 PetscReal *lcoords; 7491 PetscInt n; 7492 MPI_Datatype dimrealtype; 7493 PetscMPIInt cdimi; 7494 7495 /* TODO: support for blocked */ 7496 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7497 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7498 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7499 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7500 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7501 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7502 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7503 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7504 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7505 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7506 7507 pcbddc->mat_graph->coords = lcoords; 7508 pcbddc->mat_graph->cloc = PETSC_TRUE; 7509 pcbddc->mat_graph->cnloc = n; 7510 } 7511 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7512 pcbddc->mat_graph->nvtxs); 7513 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7514 7515 /* attach info on disconnected subdomains if present */ 7516 if (pcbddc->n_local_subs) { 7517 PetscInt *local_subs, n, totn; 7518 7519 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7520 PetscCall(PetscMalloc1(n, &local_subs)); 7521 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7522 for (i = 0; i < pcbddc->n_local_subs; i++) { 7523 const PetscInt *idxs; 7524 PetscInt nl, j; 7525 7526 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7527 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7528 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7529 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7530 } 7531 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7532 pcbddc->mat_graph->n_local_subs = totn + 1; 7533 pcbddc->mat_graph->local_subs = local_subs; 7534 } 7535 7536 /* Setup of Graph */ 7537 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7538 } 7539 7540 if (!pcbddc->graphanalyzed) { 7541 /* Graph's connected components analysis */ 7542 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7543 pcbddc->graphanalyzed = PETSC_TRUE; 7544 pcbddc->corner_selected = pcbddc->corner_selection; 7545 } 7546 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7547 PetscFunctionReturn(PETSC_SUCCESS); 7548 } 7549 7550 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7551 { 7552 PetscInt i, j, n; 7553 PetscScalar *alphas; 7554 PetscReal norm, *onorms; 7555 7556 PetscFunctionBegin; 7557 n = *nio; 7558 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7559 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7560 PetscCall(VecNormalize(vecs[0], &norm)); 7561 if (norm < PETSC_SMALL) { 7562 onorms[0] = 0.0; 7563 PetscCall(VecSet(vecs[0], 0.0)); 7564 } else { 7565 onorms[0] = norm; 7566 } 7567 7568 for (i = 1; i < n; i++) { 7569 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7570 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7571 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7572 PetscCall(VecNormalize(vecs[i], &norm)); 7573 if (norm < PETSC_SMALL) { 7574 onorms[i] = 0.0; 7575 PetscCall(VecSet(vecs[i], 0.0)); 7576 } else { 7577 onorms[i] = norm; 7578 } 7579 } 7580 /* push nonzero vectors at the beginning */ 7581 for (i = 0; i < n; i++) { 7582 if (onorms[i] == 0.0) { 7583 for (j = i + 1; j < n; j++) { 7584 if (onorms[j] != 0.0) { 7585 PetscCall(VecCopy(vecs[j], vecs[i])); 7586 onorms[i] = onorms[j]; 7587 onorms[j] = 0.0; 7588 break; 7589 } 7590 } 7591 } 7592 } 7593 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7594 PetscCall(PetscFree2(alphas, onorms)); 7595 PetscFunctionReturn(PETSC_SUCCESS); 7596 } 7597 7598 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7599 { 7600 ISLocalToGlobalMapping mapping; 7601 Mat A; 7602 PetscInt n_neighs, *neighs, *n_shared, **shared; 7603 PetscMPIInt size, rank, color; 7604 PetscInt *xadj, *adjncy; 7605 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7606 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7607 PetscInt void_procs, *procs_candidates = NULL; 7608 PetscInt xadj_count, *count; 7609 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7610 PetscSubcomm psubcomm; 7611 MPI_Comm subcomm; 7612 7613 PetscFunctionBegin; 7614 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7615 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7616 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7617 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7618 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7619 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7620 7621 if (have_void) *have_void = PETSC_FALSE; 7622 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7623 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7624 PetscCall(MatISGetLocalMat(mat, &A)); 7625 PetscCall(MatGetLocalSize(A, &n, NULL)); 7626 im_active = !!n; 7627 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7628 void_procs = size - active_procs; 7629 /* get ranks of non-active processes in mat communicator */ 7630 if (void_procs) { 7631 PetscInt ncand; 7632 7633 if (have_void) *have_void = PETSC_TRUE; 7634 PetscCall(PetscMalloc1(size, &procs_candidates)); 7635 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7636 for (i = 0, ncand = 0; i < size; i++) { 7637 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7638 } 7639 /* force n_subdomains to be not greater that the number of non-active processes */ 7640 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7641 } 7642 7643 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7644 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7645 PetscCall(MatGetSize(mat, &N, NULL)); 7646 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7647 PetscInt issize, isidx, dest; 7648 if (*n_subdomains == 1) dest = 0; 7649 else dest = rank; 7650 if (im_active) { 7651 issize = 1; 7652 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7653 isidx = procs_candidates[dest]; 7654 } else { 7655 isidx = dest; 7656 } 7657 } else { 7658 issize = 0; 7659 isidx = -1; 7660 } 7661 if (*n_subdomains != 1) *n_subdomains = active_procs; 7662 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7663 PetscCall(PetscFree(procs_candidates)); 7664 PetscFunctionReturn(PETSC_SUCCESS); 7665 } 7666 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7667 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7668 threshold = PetscMax(threshold, 2); 7669 7670 /* Get info on mapping */ 7671 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7672 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7673 7674 /* build local CSR graph of subdomains' connectivity */ 7675 PetscCall(PetscMalloc1(2, &xadj)); 7676 xadj[0] = 0; 7677 xadj[1] = PetscMax(n_neighs - 1, 0); 7678 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7679 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7680 PetscCall(PetscCalloc1(n, &count)); 7681 for (i = 1; i < n_neighs; i++) 7682 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7683 7684 xadj_count = 0; 7685 for (i = 1; i < n_neighs; i++) { 7686 for (j = 0; j < n_shared[i]; j++) { 7687 if (count[shared[i][j]] < threshold) { 7688 adjncy[xadj_count] = neighs[i]; 7689 adjncy_wgt[xadj_count] = n_shared[i]; 7690 xadj_count++; 7691 break; 7692 } 7693 } 7694 } 7695 xadj[1] = xadj_count; 7696 PetscCall(PetscFree(count)); 7697 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7698 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7699 7700 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7701 7702 /* Restrict work on active processes only */ 7703 PetscCall(PetscMPIIntCast(im_active, &color)); 7704 if (void_procs) { 7705 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7706 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7707 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7708 subcomm = PetscSubcommChild(psubcomm); 7709 } else { 7710 psubcomm = NULL; 7711 subcomm = PetscObjectComm((PetscObject)mat); 7712 } 7713 7714 v_wgt = NULL; 7715 if (!color) { 7716 PetscCall(PetscFree(xadj)); 7717 PetscCall(PetscFree(adjncy)); 7718 PetscCall(PetscFree(adjncy_wgt)); 7719 } else { 7720 Mat subdomain_adj; 7721 IS new_ranks, new_ranks_contig; 7722 MatPartitioning partitioner; 7723 PetscInt rstart, rend; 7724 PetscMPIInt irstart = 0, irend = 0; 7725 PetscInt *is_indices, *oldranks; 7726 PetscMPIInt size; 7727 PetscBool aggregate; 7728 7729 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7730 if (void_procs) { 7731 PetscInt prank = rank; 7732 PetscCall(PetscMalloc1(size, &oldranks)); 7733 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7734 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7735 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7736 } else { 7737 oldranks = NULL; 7738 } 7739 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7740 if (aggregate) { /* TODO: all this part could be made more efficient */ 7741 PetscInt lrows, row, ncols, *cols; 7742 PetscMPIInt nrank; 7743 PetscScalar *vals; 7744 7745 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7746 lrows = 0; 7747 if (nrank < redprocs) { 7748 lrows = size / redprocs; 7749 if (nrank < size % redprocs) lrows++; 7750 } 7751 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7752 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7753 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7754 PetscCall(PetscMPIIntCast(rend, &irend)); 7755 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7756 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7757 row = nrank; 7758 ncols = xadj[1] - xadj[0]; 7759 cols = adjncy; 7760 PetscCall(PetscMalloc1(ncols, &vals)); 7761 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7762 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7763 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7764 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7765 PetscCall(PetscFree(xadj)); 7766 PetscCall(PetscFree(adjncy)); 7767 PetscCall(PetscFree(adjncy_wgt)); 7768 PetscCall(PetscFree(vals)); 7769 if (use_vwgt) { 7770 Vec v; 7771 const PetscScalar *array; 7772 PetscInt nl; 7773 7774 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7775 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7776 PetscCall(VecAssemblyBegin(v)); 7777 PetscCall(VecAssemblyEnd(v)); 7778 PetscCall(VecGetLocalSize(v, &nl)); 7779 PetscCall(VecGetArrayRead(v, &array)); 7780 PetscCall(PetscMalloc1(nl, &v_wgt)); 7781 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7782 PetscCall(VecRestoreArrayRead(v, &array)); 7783 PetscCall(VecDestroy(&v)); 7784 } 7785 } else { 7786 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7787 if (use_vwgt) { 7788 PetscCall(PetscMalloc1(1, &v_wgt)); 7789 v_wgt[0] = n; 7790 } 7791 } 7792 /* PetscCall(MatView(subdomain_adj,0)); */ 7793 7794 /* Partition */ 7795 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7796 #if defined(PETSC_HAVE_PTSCOTCH) 7797 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7798 #elif defined(PETSC_HAVE_PARMETIS) 7799 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7800 #else 7801 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7802 #endif 7803 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7804 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7805 *n_subdomains = PetscMin(size, *n_subdomains); 7806 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7807 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7808 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7809 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7810 7811 /* renumber new_ranks to avoid "holes" in new set of processors */ 7812 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7813 PetscCall(ISDestroy(&new_ranks)); 7814 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7815 if (!aggregate) { 7816 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7817 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7818 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7819 } else if (oldranks) { 7820 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7821 } else { 7822 ranks_send_to_idx[0] = is_indices[0]; 7823 } 7824 } else { 7825 PetscInt idx = 0; 7826 PetscMPIInt tag; 7827 MPI_Request *reqs; 7828 7829 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7830 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7831 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7832 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7833 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7834 PetscCall(PetscFree(reqs)); 7835 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7836 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7837 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7838 } else if (oldranks) { 7839 ranks_send_to_idx[0] = oldranks[idx]; 7840 } else { 7841 ranks_send_to_idx[0] = idx; 7842 } 7843 } 7844 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7845 /* clean up */ 7846 PetscCall(PetscFree(oldranks)); 7847 PetscCall(ISDestroy(&new_ranks_contig)); 7848 PetscCall(MatDestroy(&subdomain_adj)); 7849 PetscCall(MatPartitioningDestroy(&partitioner)); 7850 } 7851 PetscCall(PetscSubcommDestroy(&psubcomm)); 7852 PetscCall(PetscFree(procs_candidates)); 7853 7854 /* assemble parallel IS for sends */ 7855 i = 1; 7856 if (!color) i = 0; 7857 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7858 PetscFunctionReturn(PETSC_SUCCESS); 7859 } 7860 7861 typedef enum { 7862 MATDENSE_PRIVATE = 0, 7863 MATAIJ_PRIVATE, 7864 MATBAIJ_PRIVATE, 7865 MATSBAIJ_PRIVATE 7866 } MatTypePrivate; 7867 7868 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7869 { 7870 Mat local_mat; 7871 IS is_sends_internal; 7872 PetscInt rows, cols, new_local_rows; 7873 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7874 PetscBool ismatis, isdense, newisdense, destroy_mat; 7875 ISLocalToGlobalMapping l2gmap; 7876 PetscInt *l2gmap_indices; 7877 const PetscInt *is_indices; 7878 MatType new_local_type; 7879 /* buffers */ 7880 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7881 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7882 PetscInt *recv_buffer_idxs_local; 7883 PetscScalar *ptr_vals, *recv_buffer_vals; 7884 const PetscScalar *send_buffer_vals; 7885 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7886 /* MPI */ 7887 MPI_Comm comm, comm_n; 7888 PetscSubcomm subcomm; 7889 PetscMPIInt n_sends, n_recvs, size; 7890 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7891 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7892 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7893 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7894 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7895 7896 PetscFunctionBegin; 7897 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7898 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7899 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7900 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7901 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7902 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7903 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7904 PetscValidLogicalCollectiveInt(mat, nis, 8); 7905 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7906 if (nvecs) { 7907 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7908 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7909 } 7910 /* further checks */ 7911 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7912 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7913 /* XXX hack for multi_element */ 7914 if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat)); 7915 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7916 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7917 7918 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7919 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7920 if (reuse && *mat_n) { 7921 PetscInt mrows, mcols, mnrows, mncols; 7922 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7923 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7924 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7925 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7926 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7927 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7928 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7929 } 7930 PetscCall(MatGetBlockSize(local_mat, &bs)); 7931 PetscValidLogicalCollectiveInt(mat, bs, 1); 7932 7933 /* prepare IS for sending if not provided */ 7934 if (!is_sends) { 7935 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7936 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7937 } else { 7938 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7939 is_sends_internal = is_sends; 7940 } 7941 7942 /* get comm */ 7943 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7944 7945 /* compute number of sends */ 7946 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7947 PetscCall(PetscMPIIntCast(i, &n_sends)); 7948 7949 /* compute number of receives */ 7950 PetscCallMPI(MPI_Comm_size(comm, &size)); 7951 PetscCall(PetscMalloc1(size, &iflags)); 7952 PetscCall(PetscArrayzero(iflags, size)); 7953 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7954 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7955 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7956 PetscCall(PetscFree(iflags)); 7957 7958 /* restrict comm if requested */ 7959 subcomm = NULL; 7960 destroy_mat = PETSC_FALSE; 7961 if (restrict_comm) { 7962 PetscMPIInt color, subcommsize; 7963 7964 color = 0; 7965 if (restrict_full) { 7966 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7967 } else { 7968 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7969 } 7970 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7971 subcommsize = size - subcommsize; 7972 /* check if reuse has been requested */ 7973 if (reuse) { 7974 if (*mat_n) { 7975 PetscMPIInt subcommsize2; 7976 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7977 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7978 comm_n = PetscObjectComm((PetscObject)*mat_n); 7979 } else { 7980 comm_n = PETSC_COMM_SELF; 7981 } 7982 } else { /* MAT_INITIAL_MATRIX */ 7983 PetscMPIInt rank; 7984 7985 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7986 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7987 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7988 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7989 comm_n = PetscSubcommChild(subcomm); 7990 } 7991 /* flag to destroy *mat_n if not significative */ 7992 if (color) destroy_mat = PETSC_TRUE; 7993 } else { 7994 comm_n = comm; 7995 } 7996 7997 /* prepare send/receive buffers */ 7998 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7999 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8000 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8001 PetscCall(PetscArrayzero(ilengths_vals, size)); 8002 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8003 8004 /* Get data from local matrices */ 8005 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8006 /* TODO: See below some guidelines on how to prepare the local buffers */ 8007 /* 8008 send_buffer_vals should contain the raw values of the local matrix 8009 send_buffer_idxs should contain: 8010 - MatType_PRIVATE type 8011 - PetscInt size_of_l2gmap 8012 - PetscInt global_row_indices[size_of_l2gmap] 8013 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8014 */ 8015 { 8016 ISLocalToGlobalMapping mapping; 8017 8018 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8019 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8020 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8021 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8022 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8023 send_buffer_idxs[1] = i; 8024 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8025 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8026 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8027 PetscCall(PetscMPIIntCast(i, &len)); 8028 for (i = 0; i < n_sends; i++) { 8029 ilengths_vals[is_indices[i]] = len * len; 8030 ilengths_idxs[is_indices[i]] = len + 2; 8031 } 8032 } 8033 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8034 /* additional is (if any) */ 8035 if (nis) { 8036 PetscMPIInt psum; 8037 PetscInt j; 8038 for (j = 0, psum = 0; j < nis; j++) { 8039 PetscInt plen; 8040 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8041 PetscCall(PetscMPIIntCast(plen, &len)); 8042 psum += len + 1; /* indices + length */ 8043 } 8044 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8045 for (j = 0, psum = 0; j < nis; j++) { 8046 PetscInt plen; 8047 const PetscInt *is_array_idxs; 8048 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8049 send_buffer_idxs_is[psum] = plen; 8050 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8051 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8052 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8053 psum += plen + 1; /* indices + length */ 8054 } 8055 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8056 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8057 } 8058 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8059 8060 buf_size_idxs = 0; 8061 buf_size_vals = 0; 8062 buf_size_idxs_is = 0; 8063 buf_size_vecs = 0; 8064 for (i = 0; i < n_recvs; i++) { 8065 buf_size_idxs += olengths_idxs[i]; 8066 buf_size_vals += olengths_vals[i]; 8067 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8068 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8069 } 8070 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8071 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8072 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8073 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8074 8075 /* get new tags for clean communications */ 8076 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8077 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8078 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8079 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8080 8081 /* allocate for requests */ 8082 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8083 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8084 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8085 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8086 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8087 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8088 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8089 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8090 8091 /* communications */ 8092 ptr_idxs = recv_buffer_idxs; 8093 ptr_vals = recv_buffer_vals; 8094 ptr_idxs_is = recv_buffer_idxs_is; 8095 ptr_vecs = recv_buffer_vecs; 8096 for (i = 0; i < n_recvs; i++) { 8097 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8098 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8099 ptr_idxs += olengths_idxs[i]; 8100 ptr_vals += olengths_vals[i]; 8101 if (nis) { 8102 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8103 ptr_idxs_is += olengths_idxs_is[i]; 8104 } 8105 if (nvecs) { 8106 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8107 ptr_vecs += olengths_idxs[i] - 2; 8108 } 8109 } 8110 for (i = 0; i < n_sends; i++) { 8111 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8112 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8113 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8114 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8115 if (nvecs) { 8116 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8117 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8118 } 8119 } 8120 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8121 PetscCall(ISDestroy(&is_sends_internal)); 8122 8123 /* assemble new l2g map */ 8124 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8125 ptr_idxs = recv_buffer_idxs; 8126 new_local_rows = 0; 8127 for (i = 0; i < n_recvs; i++) { 8128 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8129 ptr_idxs += olengths_idxs[i]; 8130 } 8131 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8132 ptr_idxs = recv_buffer_idxs; 8133 new_local_rows = 0; 8134 for (i = 0; i < n_recvs; i++) { 8135 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8136 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8137 ptr_idxs += olengths_idxs[i]; 8138 } 8139 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8140 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8141 PetscCall(PetscFree(l2gmap_indices)); 8142 8143 /* infer new local matrix type from received local matrices type */ 8144 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8145 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8146 if (n_recvs) { 8147 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8148 ptr_idxs = recv_buffer_idxs; 8149 for (i = 0; i < n_recvs; i++) { 8150 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8151 new_local_type_private = MATAIJ_PRIVATE; 8152 break; 8153 } 8154 ptr_idxs += olengths_idxs[i]; 8155 } 8156 switch (new_local_type_private) { 8157 case MATDENSE_PRIVATE: 8158 new_local_type = MATSEQAIJ; 8159 bs = 1; 8160 break; 8161 case MATAIJ_PRIVATE: 8162 new_local_type = MATSEQAIJ; 8163 bs = 1; 8164 break; 8165 case MATBAIJ_PRIVATE: 8166 new_local_type = MATSEQBAIJ; 8167 break; 8168 case MATSBAIJ_PRIVATE: 8169 new_local_type = MATSEQSBAIJ; 8170 break; 8171 default: 8172 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8173 } 8174 } else { /* by default, new_local_type is seqaij */ 8175 new_local_type = MATSEQAIJ; 8176 bs = 1; 8177 } 8178 8179 /* create MATIS object if needed */ 8180 if (!reuse) { 8181 PetscCall(MatGetSize(mat, &rows, &cols)); 8182 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8183 } else { 8184 /* it also destroys the local matrices */ 8185 if (*mat_n) { 8186 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8187 } else { /* this is a fake object */ 8188 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8189 } 8190 } 8191 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8192 PetscCall(MatSetType(local_mat, new_local_type)); 8193 8194 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8195 8196 /* Global to local map of received indices */ 8197 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8198 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8199 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8200 8201 /* restore attributes -> type of incoming data and its size */ 8202 buf_size_idxs = 0; 8203 for (i = 0; i < n_recvs; i++) { 8204 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8205 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8206 buf_size_idxs += olengths_idxs[i]; 8207 } 8208 PetscCall(PetscFree(recv_buffer_idxs)); 8209 8210 /* set preallocation */ 8211 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8212 if (!newisdense) { 8213 PetscInt *new_local_nnz = NULL; 8214 8215 ptr_idxs = recv_buffer_idxs_local; 8216 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8217 for (i = 0; i < n_recvs; i++) { 8218 PetscInt j; 8219 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8220 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8221 } else { 8222 /* TODO */ 8223 } 8224 ptr_idxs += olengths_idxs[i]; 8225 } 8226 if (new_local_nnz) { 8227 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8228 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8229 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8230 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8231 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8232 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8233 } else { 8234 PetscCall(MatSetUp(local_mat)); 8235 } 8236 PetscCall(PetscFree(new_local_nnz)); 8237 } else { 8238 PetscCall(MatSetUp(local_mat)); 8239 } 8240 8241 /* set values */ 8242 ptr_vals = recv_buffer_vals; 8243 ptr_idxs = recv_buffer_idxs_local; 8244 for (i = 0; i < n_recvs; i++) { 8245 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8246 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8247 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8248 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8249 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8250 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8251 } else { 8252 /* TODO */ 8253 } 8254 ptr_idxs += olengths_idxs[i]; 8255 ptr_vals += olengths_vals[i]; 8256 } 8257 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8258 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8259 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8260 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8261 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8262 PetscCall(PetscFree(recv_buffer_vals)); 8263 8264 #if 0 8265 if (!restrict_comm) { /* check */ 8266 Vec lvec,rvec; 8267 PetscReal infty_error; 8268 8269 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8270 PetscCall(VecSetRandom(rvec,NULL)); 8271 PetscCall(MatMult(mat,rvec,lvec)); 8272 PetscCall(VecScale(lvec,-1.0)); 8273 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8274 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8275 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8276 PetscCall(VecDestroy(&rvec)); 8277 PetscCall(VecDestroy(&lvec)); 8278 } 8279 #endif 8280 8281 /* assemble new additional is (if any) */ 8282 if (nis) { 8283 PetscInt **temp_idxs, *count_is, j, psum; 8284 8285 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8286 PetscCall(PetscCalloc1(nis, &count_is)); 8287 ptr_idxs = recv_buffer_idxs_is; 8288 psum = 0; 8289 for (i = 0; i < n_recvs; i++) { 8290 for (j = 0; j < nis; j++) { 8291 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8292 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8293 psum += plen; 8294 ptr_idxs += plen + 1; /* shift pointer to received data */ 8295 } 8296 } 8297 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8298 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8299 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8300 PetscCall(PetscArrayzero(count_is, nis)); 8301 ptr_idxs = recv_buffer_idxs_is; 8302 for (i = 0; i < n_recvs; i++) { 8303 for (j = 0; j < nis; j++) { 8304 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8305 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8306 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8307 ptr_idxs += plen + 1; /* shift pointer to received data */ 8308 } 8309 } 8310 for (i = 0; i < nis; i++) { 8311 PetscCall(ISDestroy(&isarray[i])); 8312 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8313 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8314 } 8315 PetscCall(PetscFree(count_is)); 8316 PetscCall(PetscFree(temp_idxs[0])); 8317 PetscCall(PetscFree(temp_idxs)); 8318 } 8319 /* free workspace */ 8320 PetscCall(PetscFree(recv_buffer_idxs_is)); 8321 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8322 PetscCall(PetscFree(send_buffer_idxs)); 8323 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8324 if (isdense) { 8325 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8326 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8327 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8328 } else { 8329 /* PetscCall(PetscFree(send_buffer_vals)); */ 8330 } 8331 if (nis) { 8332 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8333 PetscCall(PetscFree(send_buffer_idxs_is)); 8334 } 8335 8336 if (nvecs) { 8337 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8338 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8339 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8340 PetscCall(VecDestroy(&nnsp_vec[0])); 8341 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8342 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8343 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8344 /* set values */ 8345 ptr_vals = recv_buffer_vecs; 8346 ptr_idxs = recv_buffer_idxs_local; 8347 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8348 for (i = 0; i < n_recvs; i++) { 8349 PetscInt j; 8350 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8351 ptr_idxs += olengths_idxs[i]; 8352 ptr_vals += olengths_idxs[i] - 2; 8353 } 8354 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8355 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8356 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8357 } 8358 8359 PetscCall(PetscFree(recv_buffer_vecs)); 8360 PetscCall(PetscFree(recv_buffer_idxs_local)); 8361 PetscCall(PetscFree(recv_req_idxs)); 8362 PetscCall(PetscFree(recv_req_vals)); 8363 PetscCall(PetscFree(recv_req_vecs)); 8364 PetscCall(PetscFree(recv_req_idxs_is)); 8365 PetscCall(PetscFree(send_req_idxs)); 8366 PetscCall(PetscFree(send_req_vals)); 8367 PetscCall(PetscFree(send_req_vecs)); 8368 PetscCall(PetscFree(send_req_idxs_is)); 8369 PetscCall(PetscFree(ilengths_vals)); 8370 PetscCall(PetscFree(ilengths_idxs)); 8371 PetscCall(PetscFree(olengths_vals)); 8372 PetscCall(PetscFree(olengths_idxs)); 8373 PetscCall(PetscFree(onodes)); 8374 if (nis) { 8375 PetscCall(PetscFree(ilengths_idxs_is)); 8376 PetscCall(PetscFree(olengths_idxs_is)); 8377 PetscCall(PetscFree(onodes_is)); 8378 } 8379 PetscCall(PetscSubcommDestroy(&subcomm)); 8380 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8381 PetscCall(MatDestroy(mat_n)); 8382 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8383 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8384 PetscCall(VecDestroy(&nnsp_vec[0])); 8385 } 8386 *mat_n = NULL; 8387 } 8388 PetscFunctionReturn(PETSC_SUCCESS); 8389 } 8390 8391 /* temporary hack into ksp private data structure */ 8392 #include <petsc/private/kspimpl.h> 8393 8394 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8395 { 8396 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8397 PC_IS *pcis = (PC_IS *)pc->data; 8398 PCBDDCGraph graph = pcbddc->mat_graph; 8399 Mat coarse_mat, coarse_mat_is; 8400 Mat coarsedivudotp = NULL; 8401 Mat coarseG, t_coarse_mat_is; 8402 MatNullSpace CoarseNullSpace = NULL; 8403 ISLocalToGlobalMapping coarse_islg; 8404 IS coarse_is, *isarray, corners; 8405 PetscInt i, im_active = -1, active_procs = -1; 8406 PetscInt nis, nisdofs, nisneu, nisvert; 8407 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8408 PC pc_temp; 8409 PCType coarse_pc_type; 8410 KSPType coarse_ksp_type; 8411 PetscBool multilevel_requested, multilevel_allowed; 8412 PetscBool coarse_reuse, multi_element = graph->multi_element; 8413 PetscInt ncoarse, nedcfield; 8414 PetscBool compute_vecs = PETSC_FALSE; 8415 PetscScalar *array; 8416 MatReuse coarse_mat_reuse; 8417 PetscBool restr, full_restr, have_void; 8418 PetscMPIInt size; 8419 8420 PetscFunctionBegin; 8421 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8422 /* Assign global numbering to coarse dofs */ 8423 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8424 PetscInt ocoarse_size; 8425 compute_vecs = PETSC_TRUE; 8426 8427 pcbddc->new_primal_space = PETSC_TRUE; 8428 ocoarse_size = pcbddc->coarse_size; 8429 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8430 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8431 /* see if we can avoid some work */ 8432 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8433 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8434 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8435 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8436 coarse_reuse = PETSC_FALSE; 8437 } else { /* we can safely reuse already computed coarse matrix */ 8438 coarse_reuse = PETSC_TRUE; 8439 } 8440 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8441 coarse_reuse = PETSC_FALSE; 8442 } 8443 /* reset any subassembling information */ 8444 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8445 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8446 coarse_reuse = PETSC_TRUE; 8447 } 8448 if (coarse_reuse && pcbddc->coarse_ksp) { 8449 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8450 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8451 coarse_mat_reuse = MAT_REUSE_MATRIX; 8452 } else { 8453 coarse_mat = NULL; 8454 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8455 } 8456 8457 /* creates temporary l2gmap and IS for coarse indexes */ 8458 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8459 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8460 8461 /* creates temporary MATIS object for coarse matrix */ 8462 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8463 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8464 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8465 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE)); 8466 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8467 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8468 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8469 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8470 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8471 8472 /* count "active" (i.e. with positive local size) and "void" processes */ 8473 im_active = !!pcis->n; 8474 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8475 8476 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8477 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8478 /* full_restr : just use the receivers from the subassembling pattern */ 8479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8480 coarse_mat_is = NULL; 8481 multilevel_allowed = PETSC_FALSE; 8482 multilevel_requested = PETSC_FALSE; 8483 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8484 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8485 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8486 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8487 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8488 if (multilevel_requested) { 8489 ncoarse = active_procs / coarsening_ratio; 8490 restr = PETSC_FALSE; 8491 full_restr = PETSC_FALSE; 8492 } else { 8493 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8494 restr = PETSC_TRUE; 8495 full_restr = PETSC_TRUE; 8496 } 8497 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8498 ncoarse = PetscMax(1, ncoarse); 8499 if (!pcbddc->coarse_subassembling) { 8500 if (coarsening_ratio > 1) { 8501 if (multilevel_requested) { 8502 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8503 } else { 8504 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8505 } 8506 } else { 8507 PetscMPIInt rank; 8508 8509 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8510 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8511 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8512 } 8513 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8514 PetscInt psum; 8515 if (pcbddc->coarse_ksp) psum = 1; 8516 else psum = 0; 8517 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8518 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8519 } 8520 /* determine if we can go multilevel */ 8521 if (multilevel_requested) { 8522 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8523 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8524 } 8525 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8526 8527 /* dump subassembling pattern */ 8528 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8529 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8530 nedcfield = -1; 8531 corners = NULL; 8532 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8533 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8534 const PetscInt *idxs; 8535 ISLocalToGlobalMapping tmap; 8536 8537 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8538 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8539 /* allocate space for temporary storage */ 8540 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8541 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8542 /* allocate for IS array */ 8543 nisdofs = pcbddc->n_ISForDofsLocal; 8544 if (pcbddc->nedclocal) { 8545 if (pcbddc->nedfield > -1) { 8546 nedcfield = pcbddc->nedfield; 8547 } else { 8548 nedcfield = 0; 8549 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8550 nisdofs = 1; 8551 } 8552 } 8553 nisneu = !!pcbddc->NeumannBoundariesLocal; 8554 nisvert = 0; /* nisvert is not used */ 8555 nis = nisdofs + nisneu + nisvert; 8556 PetscCall(PetscMalloc1(nis, &isarray)); 8557 /* dofs splitting */ 8558 for (i = 0; i < nisdofs; i++) { 8559 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8560 if (nedcfield != i) { 8561 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8562 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8563 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8564 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8565 } else { 8566 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8567 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8568 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8569 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8570 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8571 } 8572 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8573 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8574 /* PetscCall(ISView(isarray[i],0)); */ 8575 } 8576 /* neumann boundaries */ 8577 if (pcbddc->NeumannBoundariesLocal) { 8578 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8579 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8580 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8581 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8582 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8583 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8584 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8585 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8586 } 8587 /* coordinates */ 8588 if (pcbddc->corner_selected) { 8589 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8590 PetscCall(ISGetLocalSize(corners, &tsize)); 8591 PetscCall(ISGetIndices(corners, &idxs)); 8592 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8593 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8594 PetscCall(ISRestoreIndices(corners, &idxs)); 8595 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8596 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8597 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8598 } 8599 PetscCall(PetscFree(tidxs)); 8600 PetscCall(PetscFree(tidxs2)); 8601 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8602 } else { 8603 nis = 0; 8604 nisdofs = 0; 8605 nisneu = 0; 8606 nisvert = 0; 8607 isarray = NULL; 8608 } 8609 /* destroy no longer needed map */ 8610 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8611 8612 /* subassemble */ 8613 if (multilevel_allowed) { 8614 Vec vp[1]; 8615 PetscInt nvecs = 0; 8616 PetscBool reuse; 8617 8618 vp[0] = NULL; 8619 /* XXX HDIV also */ 8620 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8621 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8622 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8623 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8624 nvecs = 1; 8625 8626 if (pcbddc->divudotp) { 8627 Mat B, loc_divudotp; 8628 Vec v, p; 8629 IS dummy; 8630 PetscInt np; 8631 8632 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8633 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8634 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8635 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8636 PetscCall(MatCreateVecs(B, &v, &p)); 8637 PetscCall(VecSet(p, 1.)); 8638 PetscCall(MatMultTranspose(B, p, v)); 8639 PetscCall(VecDestroy(&p)); 8640 PetscCall(MatDestroy(&B)); 8641 PetscCall(VecGetArray(vp[0], &array)); 8642 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8643 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8644 PetscCall(VecResetArray(pcbddc->vec1_P)); 8645 PetscCall(VecRestoreArray(vp[0], &array)); 8646 PetscCall(ISDestroy(&dummy)); 8647 PetscCall(VecDestroy(&v)); 8648 } 8649 } 8650 if (coarse_mat) reuse = PETSC_TRUE; 8651 else reuse = PETSC_FALSE; 8652 if (multi_element) { 8653 /* XXX divudotp */ 8654 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE)); 8655 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8656 coarse_mat_is = t_coarse_mat_is; 8657 } else { 8658 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8659 if (reuse) { 8660 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8661 } else { 8662 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8663 } 8664 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8665 PetscScalar *arraym; 8666 const PetscScalar *arrayv; 8667 PetscInt nl; 8668 PetscCall(VecGetLocalSize(vp[0], &nl)); 8669 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8670 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8671 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8672 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8673 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8674 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8675 PetscCall(VecDestroy(&vp[0])); 8676 } else { 8677 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8678 } 8679 } 8680 } else { 8681 if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8682 else { 8683 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8684 coarse_mat_is = t_coarse_mat_is; 8685 } 8686 } 8687 if (coarse_mat_is || coarse_mat) { 8688 if (!multilevel_allowed) { 8689 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8690 } else { 8691 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8692 if (coarse_mat_is) { 8693 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8694 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8695 coarse_mat = coarse_mat_is; 8696 } 8697 } 8698 } 8699 PetscCall(MatDestroy(&t_coarse_mat_is)); 8700 PetscCall(MatDestroy(&coarse_mat_is)); 8701 8702 /* create local to global scatters for coarse problem */ 8703 if (compute_vecs) { 8704 PetscInt lrows; 8705 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8706 if (coarse_mat) { 8707 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8708 } else { 8709 lrows = 0; 8710 } 8711 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8712 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8713 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8714 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8715 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8716 } 8717 PetscCall(ISDestroy(&coarse_is)); 8718 8719 /* set defaults for coarse KSP and PC */ 8720 if (multilevel_allowed) { 8721 coarse_ksp_type = KSPRICHARDSON; 8722 coarse_pc_type = PCBDDC; 8723 } else { 8724 coarse_ksp_type = KSPPREONLY; 8725 coarse_pc_type = PCREDUNDANT; 8726 } 8727 8728 /* print some info if requested */ 8729 if (pcbddc->dbg_flag) { 8730 if (!multilevel_allowed) { 8731 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8732 if (multilevel_requested) { 8733 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8734 } else if (pcbddc->max_levels) { 8735 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8736 } 8737 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8738 } 8739 } 8740 8741 /* communicate coarse discrete gradient */ 8742 coarseG = NULL; 8743 if (pcbddc->nedcG && multilevel_allowed) { 8744 MPI_Comm ccomm; 8745 if (coarse_mat) { 8746 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8747 } else { 8748 ccomm = MPI_COMM_NULL; 8749 } 8750 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8751 } 8752 8753 /* create the coarse KSP object only once with defaults */ 8754 if (coarse_mat) { 8755 PetscBool isredundant, isbddc, force, valid; 8756 PetscViewer dbg_viewer = NULL; 8757 PetscBool isset, issym, isher, isspd; 8758 8759 if (pcbddc->dbg_flag) { 8760 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8761 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8762 } 8763 if (!pcbddc->coarse_ksp) { 8764 char prefix[256], str_level[16]; 8765 size_t len; 8766 8767 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8768 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8769 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8770 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8771 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8772 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8773 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8774 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8775 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8776 /* TODO is this logic correct? should check for coarse_mat type */ 8777 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8778 /* prefix */ 8779 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8780 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8781 if (!pcbddc->current_level) { 8782 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8783 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8784 } else { 8785 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8786 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8787 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8788 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8789 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8790 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8791 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8792 } 8793 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8794 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8795 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8796 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8797 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8798 /* allow user customization */ 8799 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8800 /* get some info after set from options */ 8801 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8802 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8803 force = PETSC_FALSE; 8804 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8805 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8806 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8807 if (multilevel_allowed && !force && !valid) { 8808 isbddc = PETSC_TRUE; 8809 PetscCall(PCSetType(pc_temp, PCBDDC)); 8810 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8811 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8812 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8813 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8814 PetscObjectOptionsBegin((PetscObject)pc_temp); 8815 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8816 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8817 PetscOptionsEnd(); 8818 pc_temp->setfromoptionscalled++; 8819 } 8820 } 8821 } 8822 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8823 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8824 if (nisdofs) { 8825 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8826 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8827 } 8828 if (nisneu) { 8829 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8830 PetscCall(ISDestroy(&isarray[nisdofs])); 8831 } 8832 if (nisvert) { 8833 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8834 PetscCall(ISDestroy(&isarray[nis - 1])); 8835 } 8836 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8837 8838 /* get some info after set from options */ 8839 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8840 8841 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8842 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8843 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8844 force = PETSC_FALSE; 8845 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8846 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8847 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8848 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8849 if (isredundant) { 8850 KSP inner_ksp; 8851 PC inner_pc; 8852 8853 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8854 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8855 } 8856 8857 /* parameters which miss an API */ 8858 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8859 if (isbddc) { 8860 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8861 8862 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8863 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8864 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8865 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8866 if (pcbddc_coarse->benign_saddle_point) { 8867 Mat coarsedivudotp_is; 8868 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8869 IS row, col; 8870 const PetscInt *gidxs; 8871 PetscInt n, st, M, N; 8872 8873 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8874 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8875 st = st - n; 8876 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8877 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8878 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8879 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8880 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8881 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8882 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8883 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8884 PetscCall(ISGetSize(row, &M)); 8885 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8886 PetscCall(ISDestroy(&row)); 8887 PetscCall(ISDestroy(&col)); 8888 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8889 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8890 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8891 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8892 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8893 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8894 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8895 PetscCall(MatDestroy(&coarsedivudotp)); 8896 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8897 PetscCall(MatDestroy(&coarsedivudotp_is)); 8898 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8899 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8900 } 8901 } 8902 8903 /* propagate symmetry info of coarse matrix */ 8904 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8905 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8906 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8907 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8908 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8909 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8910 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8911 8912 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8913 /* set operators */ 8914 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8915 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8916 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8917 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8918 } 8919 PetscCall(MatDestroy(&coarseG)); 8920 PetscCall(PetscFree(isarray)); 8921 #if 0 8922 { 8923 PetscViewer viewer; 8924 char filename[256]; 8925 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8926 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8927 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8928 PetscCall(MatView(coarse_mat,viewer)); 8929 PetscCall(PetscViewerPopFormat(viewer)); 8930 PetscCall(PetscViewerDestroy(&viewer)); 8931 } 8932 #endif 8933 8934 if (corners) { 8935 Vec gv; 8936 IS is; 8937 const PetscInt *idxs; 8938 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8939 PetscScalar *coords; 8940 8941 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8942 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8943 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8944 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8945 PetscCall(VecSetBlockSize(gv, cdim)); 8946 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8947 PetscCall(VecSetType(gv, VECSTANDARD)); 8948 PetscCall(VecSetFromOptions(gv)); 8949 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8950 8951 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8952 PetscCall(ISGetLocalSize(is, &n)); 8953 PetscCall(ISGetIndices(is, &idxs)); 8954 PetscCall(PetscMalloc1(n * cdim, &coords)); 8955 for (i = 0; i < n; i++) { 8956 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8957 } 8958 PetscCall(ISRestoreIndices(is, &idxs)); 8959 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8960 8961 PetscCall(ISGetLocalSize(corners, &n)); 8962 PetscCall(ISGetIndices(corners, &idxs)); 8963 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8964 PetscCall(ISRestoreIndices(corners, &idxs)); 8965 PetscCall(PetscFree(coords)); 8966 PetscCall(VecAssemblyBegin(gv)); 8967 PetscCall(VecAssemblyEnd(gv)); 8968 PetscCall(VecGetArray(gv, &coords)); 8969 if (pcbddc->coarse_ksp) { 8970 PC coarse_pc; 8971 PetscBool isbddc; 8972 8973 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8974 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8975 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8976 PetscReal *realcoords; 8977 8978 PetscCall(VecGetLocalSize(gv, &n)); 8979 #if defined(PETSC_USE_COMPLEX) 8980 PetscCall(PetscMalloc1(n, &realcoords)); 8981 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8982 #else 8983 realcoords = coords; 8984 #endif 8985 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8986 #if defined(PETSC_USE_COMPLEX) 8987 PetscCall(PetscFree(realcoords)); 8988 #endif 8989 } 8990 } 8991 PetscCall(VecRestoreArray(gv, &coords)); 8992 PetscCall(VecDestroy(&gv)); 8993 } 8994 PetscCall(ISDestroy(&corners)); 8995 8996 if (pcbddc->coarse_ksp) { 8997 Vec crhs, csol; 8998 8999 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9000 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9001 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9002 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9003 } 9004 PetscCall(MatDestroy(&coarsedivudotp)); 9005 9006 /* compute null space for coarse solver if the benign trick has been requested */ 9007 if (pcbddc->benign_null) { 9008 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9009 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9010 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9011 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9012 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9013 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9014 if (coarse_mat) { 9015 Vec nullv; 9016 PetscScalar *array, *array2; 9017 PetscInt nl; 9018 9019 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9020 PetscCall(VecGetLocalSize(nullv, &nl)); 9021 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9022 PetscCall(VecGetArray(nullv, &array2)); 9023 PetscCall(PetscArraycpy(array2, array, nl)); 9024 PetscCall(VecRestoreArray(nullv, &array2)); 9025 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9026 PetscCall(VecNormalize(nullv, NULL)); 9027 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9028 PetscCall(VecDestroy(&nullv)); 9029 } 9030 } 9031 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9032 9033 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9034 if (pcbddc->coarse_ksp) { 9035 PetscBool ispreonly; 9036 9037 if (CoarseNullSpace) { 9038 PetscBool isnull; 9039 9040 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9041 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9042 /* TODO: add local nullspaces (if any) */ 9043 } 9044 /* setup coarse ksp */ 9045 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9046 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9047 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9048 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9049 KSP check_ksp; 9050 KSPType check_ksp_type; 9051 PC check_pc; 9052 Vec check_vec, coarse_vec; 9053 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9054 PetscInt its; 9055 PetscBool compute_eigs; 9056 PetscReal *eigs_r, *eigs_c; 9057 PetscInt neigs; 9058 const char *prefix; 9059 9060 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9061 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9062 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9063 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9064 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9065 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9066 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9067 /* prevent from setup unneeded object */ 9068 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9069 PetscCall(PCSetType(check_pc, PCNONE)); 9070 if (ispreonly) { 9071 check_ksp_type = KSPPREONLY; 9072 compute_eigs = PETSC_FALSE; 9073 } else { 9074 check_ksp_type = KSPGMRES; 9075 compute_eigs = PETSC_TRUE; 9076 } 9077 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9078 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9079 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9080 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9081 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9082 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9083 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9084 PetscCall(KSPSetFromOptions(check_ksp)); 9085 PetscCall(KSPSetUp(check_ksp)); 9086 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9087 PetscCall(KSPSetPC(check_ksp, check_pc)); 9088 /* create random vec */ 9089 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9090 PetscCall(VecSetRandom(check_vec, NULL)); 9091 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9092 /* solve coarse problem */ 9093 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9094 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9095 /* set eigenvalue estimation if preonly has not been requested */ 9096 if (compute_eigs) { 9097 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9098 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9099 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9100 if (neigs) { 9101 lambda_max = eigs_r[neigs - 1]; 9102 lambda_min = eigs_r[0]; 9103 if (pcbddc->use_coarse_estimates) { 9104 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9105 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9106 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9107 } 9108 } 9109 } 9110 } 9111 9112 /* check coarse problem residual error */ 9113 if (pcbddc->dbg_flag) { 9114 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9115 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9116 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9117 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9118 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9119 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9120 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9121 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9122 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9123 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9124 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9125 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9126 if (compute_eigs) { 9127 PetscReal lambda_max_s, lambda_min_s; 9128 KSPConvergedReason reason; 9129 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9130 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9131 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9132 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9133 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9134 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9135 } 9136 PetscCall(PetscViewerFlush(dbg_viewer)); 9137 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9138 } 9139 PetscCall(VecDestroy(&check_vec)); 9140 PetscCall(VecDestroy(&coarse_vec)); 9141 PetscCall(KSPDestroy(&check_ksp)); 9142 if (compute_eigs) { 9143 PetscCall(PetscFree(eigs_r)); 9144 PetscCall(PetscFree(eigs_c)); 9145 } 9146 } 9147 } 9148 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9149 /* print additional info */ 9150 if (pcbddc->dbg_flag) { 9151 /* waits until all processes reaches this point */ 9152 PetscCall(PetscBarrier((PetscObject)pc)); 9153 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9154 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9155 } 9156 9157 /* free memory */ 9158 PetscCall(MatDestroy(&coarse_mat)); 9159 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9160 PetscFunctionReturn(PETSC_SUCCESS); 9161 } 9162 9163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9164 { 9165 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9166 PC_IS *pcis = (PC_IS *)pc->data; 9167 IS subset, subset_mult, subset_n; 9168 PetscInt local_size, coarse_size = 0; 9169 PetscInt *local_primal_indices = NULL; 9170 const PetscInt *t_local_primal_indices; 9171 9172 PetscFunctionBegin; 9173 /* Compute global number of coarse dofs */ 9174 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9175 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9176 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9177 PetscCall(ISDestroy(&subset_n)); 9178 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9179 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9180 PetscCall(ISDestroy(&subset)); 9181 PetscCall(ISDestroy(&subset_mult)); 9182 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9183 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9184 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9185 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9186 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9187 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9188 PetscCall(ISDestroy(&subset_n)); 9189 9190 if (pcbddc->dbg_flag) { 9191 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9192 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9193 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9194 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9195 } 9196 9197 /* get back data */ 9198 *coarse_size_n = coarse_size; 9199 *local_primal_indices_n = local_primal_indices; 9200 PetscFunctionReturn(PETSC_SUCCESS); 9201 } 9202 9203 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9204 { 9205 IS localis_t; 9206 PetscInt i, lsize, *idxs, n; 9207 PetscScalar *vals; 9208 9209 PetscFunctionBegin; 9210 /* get indices in local ordering exploiting local to global map */ 9211 PetscCall(ISGetLocalSize(globalis, &lsize)); 9212 PetscCall(PetscMalloc1(lsize, &vals)); 9213 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9214 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9215 PetscCall(VecSet(gwork, 0.0)); 9216 PetscCall(VecSet(lwork, 0.0)); 9217 if (idxs) { /* multilevel guard */ 9218 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9219 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9220 } 9221 PetscCall(VecAssemblyBegin(gwork)); 9222 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9223 PetscCall(PetscFree(vals)); 9224 PetscCall(VecAssemblyEnd(gwork)); 9225 /* now compute set in local ordering */ 9226 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9227 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9228 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9229 PetscCall(VecGetSize(lwork, &n)); 9230 for (i = 0, lsize = 0; i < n; i++) { 9231 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9232 } 9233 PetscCall(PetscMalloc1(lsize, &idxs)); 9234 for (i = 0, lsize = 0; i < n; i++) { 9235 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9236 } 9237 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9238 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9239 *localis = localis_t; 9240 PetscFunctionReturn(PETSC_SUCCESS); 9241 } 9242 9243 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9244 { 9245 PC_IS *pcis = (PC_IS *)pc->data; 9246 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9247 PC_IS *pcisf; 9248 PC_BDDC *pcbddcf; 9249 PC pcf; 9250 9251 PetscFunctionBegin; 9252 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9253 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9254 PetscCall(PCSetType(pcf, PCBDDC)); 9255 9256 pcisf = (PC_IS *)pcf->data; 9257 pcbddcf = (PC_BDDC *)pcf->data; 9258 9259 pcisf->is_B_local = pcis->is_B_local; 9260 pcisf->vec1_N = pcis->vec1_N; 9261 pcisf->BtoNmap = pcis->BtoNmap; 9262 pcisf->n = pcis->n; 9263 pcisf->n_B = pcis->n_B; 9264 9265 PetscCall(PetscFree(pcbddcf->mat_graph)); 9266 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9267 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9268 pcbddcf->sub_schurs = schurs; 9269 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9270 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9271 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9272 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9273 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9274 pcbddcf->use_faces = PETSC_TRUE; 9275 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9276 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9277 pcbddcf->use_qr_single = (PetscBool)!constraints; 9278 pcbddcf->fake_change = PETSC_TRUE; 9279 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9280 9281 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9282 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9283 9284 *change = pcbddcf->ConstraintMatrix; 9285 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9286 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9287 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9288 9289 if (schurs) pcbddcf->sub_schurs = NULL; 9290 pcbddcf->ConstraintMatrix = NULL; 9291 pcbddcf->mat_graph = NULL; 9292 pcisf->is_B_local = NULL; 9293 pcisf->vec1_N = NULL; 9294 pcisf->BtoNmap = NULL; 9295 PetscCall(PCDestroy(&pcf)); 9296 PetscFunctionReturn(PETSC_SUCCESS); 9297 } 9298 9299 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9300 { 9301 PC_IS *pcis = (PC_IS *)pc->data; 9302 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9303 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9304 Mat S_j; 9305 PetscInt *used_xadj, *used_adjncy; 9306 PetscBool free_used_adj; 9307 9308 PetscFunctionBegin; 9309 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9310 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9311 free_used_adj = PETSC_FALSE; 9312 if (pcbddc->sub_schurs_layers == -1) { 9313 used_xadj = NULL; 9314 used_adjncy = NULL; 9315 } else { 9316 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9317 used_xadj = pcbddc->mat_graph->xadj; 9318 used_adjncy = pcbddc->mat_graph->adjncy; 9319 } else if (pcbddc->computed_rowadj) { 9320 used_xadj = pcbddc->mat_graph->xadj; 9321 used_adjncy = pcbddc->mat_graph->adjncy; 9322 } else { 9323 PetscBool flg_row = PETSC_FALSE; 9324 const PetscInt *xadj, *adjncy; 9325 PetscInt nvtxs; 9326 9327 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9328 if (flg_row) { 9329 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9330 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9331 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9332 free_used_adj = PETSC_TRUE; 9333 } else { 9334 pcbddc->sub_schurs_layers = -1; 9335 used_xadj = NULL; 9336 used_adjncy = NULL; 9337 } 9338 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9339 } 9340 } 9341 9342 /* setup sub_schurs data */ 9343 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9344 if (!sub_schurs->schur_explicit) { 9345 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9346 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9347 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9348 } else { 9349 Mat change = NULL; 9350 Vec scaling = NULL; 9351 IS change_primal = NULL, iP; 9352 PetscInt benign_n; 9353 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9354 PetscBool need_change = PETSC_FALSE; 9355 PetscBool discrete_harmonic = PETSC_FALSE; 9356 9357 if (!pcbddc->use_vertices && reuse_solvers) { 9358 PetscInt n_vertices; 9359 9360 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9361 reuse_solvers = (PetscBool)!n_vertices; 9362 } 9363 if (!pcbddc->benign_change_explicit) { 9364 benign_n = pcbddc->benign_n; 9365 } else { 9366 benign_n = 0; 9367 } 9368 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9369 We need a global reduction to avoid possible deadlocks. 9370 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9371 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9372 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9373 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9374 need_change = (PetscBool)(!need_change); 9375 } 9376 /* If the user defines additional constraints, we import them here */ 9377 if (need_change) { 9378 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9379 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9380 } 9381 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9382 9383 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9384 if (iP) { 9385 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9386 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9387 PetscOptionsEnd(); 9388 } 9389 if (discrete_harmonic) { 9390 Mat A; 9391 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9392 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9393 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9394 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9395 pcbddc->benign_zerodiag_subs, change, change_primal)); 9396 PetscCall(MatDestroy(&A)); 9397 } else { 9398 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9399 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9400 } 9401 PetscCall(MatDestroy(&change)); 9402 PetscCall(ISDestroy(&change_primal)); 9403 } 9404 PetscCall(MatDestroy(&S_j)); 9405 9406 /* free adjacency */ 9407 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9408 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9409 PetscFunctionReturn(PETSC_SUCCESS); 9410 } 9411 9412 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9413 { 9414 PC_IS *pcis = (PC_IS *)pc->data; 9415 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9416 PCBDDCGraph graph; 9417 9418 PetscFunctionBegin; 9419 /* attach interface graph for determining subsets */ 9420 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9421 IS verticesIS, verticescomm; 9422 PetscInt vsize, *idxs; 9423 9424 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9425 PetscCall(ISGetSize(verticesIS, &vsize)); 9426 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9427 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9428 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9429 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9430 PetscCall(PCBDDCGraphCreate(&graph)); 9431 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9432 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9433 PetscCall(ISDestroy(&verticescomm)); 9434 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9435 } else { 9436 graph = pcbddc->mat_graph; 9437 } 9438 /* print some info */ 9439 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9440 IS vertices; 9441 PetscInt nv, nedges, nfaces; 9442 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9443 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9444 PetscCall(ISGetSize(vertices, &nv)); 9445 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9446 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9447 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9448 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9449 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9450 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9451 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9452 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9453 } 9454 9455 /* sub_schurs init */ 9456 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9457 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9458 9459 /* free graph struct */ 9460 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9461 PetscFunctionReturn(PETSC_SUCCESS); 9462 } 9463 9464 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9465 { 9466 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9467 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9468 const PetscInt *idxs; 9469 IS gis; 9470 9471 PetscFunctionBegin; 9472 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9473 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9474 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9475 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9476 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9477 PetscCall(ISGetLocalSize(is, &ni)); 9478 PetscCall(ISGetIndices(is, &idxs)); 9479 for (PetscInt i = 0; i < ni; i++) { 9480 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9481 matis->sf_leafdata[idxs[i]] = 1; 9482 } 9483 PetscCall(ISRestoreIndices(is, &idxs)); 9484 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9485 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9486 ln = 0; 9487 for (PetscInt i = 0; i < n; i++) { 9488 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9489 } 9490 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9491 PetscCall(ISView(gis, viewer)); 9492 PetscCall(ISDestroy(&gis)); 9493 PetscFunctionReturn(PETSC_SUCCESS); 9494 } 9495 9496 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9497 { 9498 PetscInt header[11]; 9499 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9500 PetscViewer viewer; 9501 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9502 9503 PetscFunctionBegin; 9504 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9505 if (load) { 9506 IS is; 9507 Mat A; 9508 9509 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9510 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9511 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9512 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9513 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9514 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9515 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9516 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9517 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9518 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9519 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9520 if (header[0]) { 9521 PetscCall(ISCreate(comm, &is)); 9522 PetscCall(ISLoad(is, viewer)); 9523 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9524 PetscCall(ISDestroy(&is)); 9525 } 9526 if (header[1]) { 9527 PetscCall(ISCreate(comm, &is)); 9528 PetscCall(ISLoad(is, viewer)); 9529 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9530 PetscCall(ISDestroy(&is)); 9531 } 9532 if (header[2]) { 9533 IS *isarray; 9534 9535 PetscCall(PetscMalloc1(header[2], &isarray)); 9536 for (PetscInt i = 0; i < header[2]; i++) { 9537 PetscCall(ISCreate(comm, &isarray[i])); 9538 PetscCall(ISLoad(isarray[i], viewer)); 9539 } 9540 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9541 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9542 PetscCall(PetscFree(isarray)); 9543 } 9544 if (header[3]) { 9545 PetscCall(ISCreate(comm, &is)); 9546 PetscCall(ISLoad(is, viewer)); 9547 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9548 PetscCall(ISDestroy(&is)); 9549 } 9550 if (header[4]) { 9551 PetscCall(MatCreate(comm, &A)); 9552 PetscCall(MatSetType(A, MATAIJ)); 9553 PetscCall(MatLoad(A, viewer)); 9554 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9555 PetscCall(MatDestroy(&A)); 9556 } 9557 if (header[9]) { 9558 PetscCall(MatCreate(comm, &A)); 9559 PetscCall(MatSetType(A, MATIS)); 9560 PetscCall(MatLoad(A, viewer)); 9561 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9562 PetscCall(MatDestroy(&A)); 9563 } 9564 } else { 9565 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9566 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9567 header[2] = pcbddc->n_ISForDofsLocal; 9568 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9569 header[4] = (PetscInt)!!pcbddc->discretegradient; 9570 header[5] = pcbddc->nedorder; 9571 header[6] = pcbddc->nedfield; 9572 header[7] = (PetscInt)pcbddc->nedglobal; 9573 header[8] = (PetscInt)pcbddc->conforming; 9574 header[9] = (PetscInt)!!pcbddc->divudotp; 9575 header[10] = (PetscInt)pcbddc->divudotp_trans; 9576 if (header[4]) header[3] = 0; 9577 9578 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9579 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9580 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9581 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9582 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9583 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9584 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9585 } 9586 PetscCall(PetscViewerDestroy(&viewer)); 9587 PetscFunctionReturn(PETSC_SUCCESS); 9588 } 9589 9590 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9591 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9592 { 9593 Mat At; 9594 IS rows; 9595 PetscInt rst, ren; 9596 PetscLayout rmap; 9597 9598 PetscFunctionBegin; 9599 rst = ren = 0; 9600 if (ccomm != MPI_COMM_NULL) { 9601 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9602 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9603 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9604 PetscCall(PetscLayoutSetUp(rmap)); 9605 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9606 } 9607 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9608 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9609 PetscCall(ISDestroy(&rows)); 9610 9611 if (ccomm != MPI_COMM_NULL) { 9612 Mat_MPIAIJ *a, *b; 9613 IS from, to; 9614 Vec gvec; 9615 PetscInt lsize; 9616 9617 PetscCall(MatCreate(ccomm, B)); 9618 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9619 PetscCall(MatSetType(*B, MATAIJ)); 9620 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9621 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9622 a = (Mat_MPIAIJ *)At->data; 9623 b = (Mat_MPIAIJ *)(*B)->data; 9624 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9625 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9626 PetscCall(PetscObjectReference((PetscObject)a->A)); 9627 PetscCall(PetscObjectReference((PetscObject)a->B)); 9628 b->A = a->A; 9629 b->B = a->B; 9630 9631 b->donotstash = a->donotstash; 9632 b->roworiented = a->roworiented; 9633 b->rowindices = NULL; 9634 b->rowvalues = NULL; 9635 b->getrowactive = PETSC_FALSE; 9636 9637 (*B)->rmap = rmap; 9638 (*B)->factortype = A->factortype; 9639 (*B)->assembled = PETSC_TRUE; 9640 (*B)->insertmode = NOT_SET_VALUES; 9641 (*B)->preallocated = PETSC_TRUE; 9642 9643 if (a->colmap) { 9644 #if defined(PETSC_USE_CTABLE) 9645 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9646 #else 9647 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9648 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9649 #endif 9650 } else b->colmap = NULL; 9651 if (a->garray) { 9652 PetscInt len; 9653 len = a->B->cmap->n; 9654 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9655 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9656 } else b->garray = NULL; 9657 9658 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9659 b->lvec = a->lvec; 9660 9661 /* cannot use VecScatterCopy */ 9662 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9663 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9664 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9665 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9666 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9667 PetscCall(ISDestroy(&from)); 9668 PetscCall(ISDestroy(&to)); 9669 PetscCall(VecDestroy(&gvec)); 9670 } 9671 PetscCall(MatDestroy(&At)); 9672 PetscFunctionReturn(PETSC_SUCCESS); 9673 } 9674 9675 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9676 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9677 { 9678 PetscBool isaij; 9679 MPI_Comm comm; 9680 9681 PetscFunctionBegin; 9682 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9683 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9684 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9685 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9686 if (isaij) { /* SeqAIJ supports repeated rows */ 9687 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9688 } else { 9689 Mat A_loc; 9690 Mat_SeqAIJ *da; 9691 PetscSF sf; 9692 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9693 PetscScalar *daa; 9694 const PetscInt *idxs; 9695 const PetscSFNode *iremotes; 9696 PetscSFNode *remotes; 9697 9698 /* SF for incoming rows */ 9699 PetscCall(PetscSFCreate(comm, &sf)); 9700 PetscCall(ISGetLocalSize(rows, &ni)); 9701 PetscCall(ISGetIndices(rows, &idxs)); 9702 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9703 PetscCall(ISRestoreIndices(rows, &idxs)); 9704 9705 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9706 da = (Mat_SeqAIJ *)A_loc->data; 9707 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9708 for (PetscInt i = 0; i < m; i++) { 9709 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9710 rdata[2 * i + 1] = da->i[i]; 9711 } 9712 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9713 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9714 PetscCall(PetscMalloc1(ni + 1, &di)); 9715 di[0] = 0; 9716 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9717 PetscCall(PetscMalloc1(di[ni], &dj)); 9718 PetscCall(PetscMalloc1(di[ni], &daa)); 9719 PetscCall(PetscMalloc1(di[ni], &remotes)); 9720 9721 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9722 9723 /* SF graph for nonzeros */ 9724 c = 0; 9725 for (PetscInt i = 0; i < ni; i++) { 9726 const PetscInt rank = iremotes[i].rank; 9727 const PetscInt rsize = ldata[2 * i]; 9728 for (PetscInt j = 0; j < rsize; j++) { 9729 remotes[c].rank = rank; 9730 remotes[c].index = ldata[2 * i + 1] + j; 9731 c++; 9732 } 9733 } 9734 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9735 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9736 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9737 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9738 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9739 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9740 9741 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9742 PetscCall(MatDestroy(&A_loc)); 9743 PetscCall(PetscSFDestroy(&sf)); 9744 PetscCall(PetscFree(di)); 9745 PetscCall(PetscFree(dj)); 9746 PetscCall(PetscFree(daa)); 9747 PetscCall(PetscFree(remotes)); 9748 PetscCall(PetscFree2(ldata, rdata)); 9749 } 9750 PetscFunctionReturn(PETSC_SUCCESS); 9751 } 9752