Lines Matching refs:jac
20 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
24 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &jac->ksp));
25 PetscCall(KSPSetNestLevel(jac->ksp, pc->kspnestlevel));
26 PetscCall(KSPSetErrorIfNotConverged(jac->ksp, pc->erroriffailure));
27 PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp, (PetscObject)pc, 1));
29 PetscCall(KSPSetOptionsPrefix(jac->ksp, prefix));
30 PetscCall(KSPAppendOptionsPrefix(jac->ksp, "pc_bjkokkos_"));
33 PetscCall(KSPSetDM(jac->ksp, dm));
34 PetscCall(KSPSetDMActive(jac->ksp, KSP_DMACTIVE_ALL, PETSC_FALSE));
36 jac->reason = PETSC_FALSE;
37 jac->monitor = PETSC_FALSE;
38 jac->batch_target = 0;
39 jac->rank_target = 0;
40 jac->nsolves_team = 1;
41 jac->ksp->max_it = 50; // this is really for GMRES w/o restarts
528 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
537 PetscInt maxit = jac->ksp->max_it;
539 const PetscInt nwork = jac->nwork, nBlk = jac->nBlocks;
541 PetscReal rtol = jac->ksp->rtol, atol = jac->ksp->abstol, dtol = jac->ksp->divtol;
542 const PetscScalar *glb_idiag = jac->d_idiag_k->data(), *glb_bdata = NULL;
543 const PetscInt *glb_Aai, *glb_Aaj, *d_bid_eqOffset = jac->d_bid_eqOffset_k->data();
545 const PetscInt *d_isicol = jac->d_isicol_k->data(), *d_isrow = jac->d_isrow_k->data();
547 KSPIndex ksp_type_idx = jac->ksp_type_idx;
553 PetscBool monitor = jac->monitor; // captured
554 PetscInt view_bid = jac->batch_target;
558 jac->max_nits = 0;
560 if (jac->rank_target != rank) view_bid = -1; // turn off all but one process
602 d_bid_eqOffset = jac->d_bid_eqOffset_k->data();
605 if (jac->const_block_size) { // use shared memory for work vectors only if constant block size - TODO: test efficiency loss
610 stride_shared = jac->const_block_size; // captured
611 nShareVec = maximum_shared_mem_size / (jac->const_block_size * sizeof(PetscScalar)); // integer floor, number of vectors that fit in shared
614 global_buff_words = jac->n * nGlobBVec;
615 scr_bytes_team_shared = jac->const_block_size * nShareVec * sizeof(PetscScalar);
619 global_buff_words = jac->n * nwork;
622 stride_global = jac->n; // captured
628 PetscCall(PetscInfo(pc, "\tn = %d. %d shared bytes/team, %d global mem bytes, rtol=%e, num blocks %d, team_size=%d, %d vector threads, %d shared vectors, %d global vectors\n", (int)jac->n, scr_bytes_team_shared, global_buff_words, rtol, (int)nBlk, (int)team_size, PCBJKOKKOS_VEC_SIZE, nShareVec, nGlobBVec));
665 if (jac->reason) { // -pc_bjkokkos_ksp_converged_reason
675 for (PetscInt dmIdx = 0, head = 0, s = 0; dmIdx < jac->num_dms; dmIdx += batch_sz) {
676 for (PetscInt f = 0, idx = head; f < jac->dm_Nf[dmIdx]; f++, idx++, s++) {
679 jac->max_nits += h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its; // report total number of iterations with high verbose
680 if (h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its > max_nnit) {
681 max_nnit = h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its;
685 if (h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its > max_nnit) {
686 jac->max_nits = max_nnit = h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its;
693 for (int bid = 0; bid < batch_sz; bid++) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)A), "%3" PetscInt_FMT " ", h_metadata[idx + bid * jac->dm_Nf[dmIdx]].its));
699 head += batch_sz * jac->dm_Nf[dmIdx];
708 jac->max_nits = max_nnit = h_metadata[blkID].its;
728 PetscCheck(h_metadata[blkID].reason >= 0 || !jac->ksp->errorifnotconverged, PetscObjectComm((PetscObject)pc), PETSC_ERR_CONV_FAILED, "ERROR reason=%s, its=%" PetscInt_FMT ". species %" PetscInt_FMT ", batch %" PetscInt_FMT,
740 if (!errsum && !jac->max_nits) { // set max its to give back to top KSP
742 if (h_metadata[blkID].its > jac->max_nits) jac->max_nits = h_metadata[blkID].its;
768 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
783 if (!jac->vec_diag) {
809 jac->d_isrow_k = new Kokkos::View<PetscInt *>(Kokkos::create_mirror(DefaultMemorySpace(), h_isrow_k));
810 jac->d_isicol_k = new Kokkos::View<PetscInt *>(Kokkos::create_mirror(DefaultMemorySpace(), h_isicol_k));
811 Kokkos::deep_copy(*jac->d_isrow_k, h_isrow_k);
812 Kokkos::deep_copy(*jac->d_isicol_k, h_isicol_k);
823 PetscCall(DMCreateGlobalVector(pack, &jac->vec_diag));
826 if (!jac->vec_diag) { // get 'nDMs' and sizes 'block_sizes' w/o DMComposite. TODO: User could provide ISs
872 PetscCall(MatCreateVecs(A, &jac->vec_diag, NULL));
877 jac->num_dms = nDMs;
878 PetscCall(VecGetLocalSize(jac->vec_diag, &n));
879 jac->n = n;
880 jac->d_idiag_k = new Kokkos::View<PetscScalar *, Kokkos::LayoutRight>("idiag", n);
883 PetscCall(KSPSetFromOptions(jac->ksp));
884 PetscCall(PetscObjectTypeCompareAny((PetscObject)jac->ksp, &flg, KSPBICG, ""));
886 jac->ksp_type_idx = BATCH_KSP_BICG_IDX;
887 jac->nwork = 7;
889 PetscCall(PetscObjectTypeCompareAny((PetscObject)jac->ksp, &flg, KSPTFQMR, ""));
891 jac->ksp_type_idx = BATCH_KSP_TFQMR_IDX;
892 jac->nwork = 10;
895 PetscCall(PetscObjectTypeCompareAny((PetscObject)jac->ksp, &flg, KSPGMRES, ""));
897 jac->ksp_type_idx = BATCH_KSP_GMRESKK_IDX;
898 jac->nwork = 0;
901 PetscCall(KSPGetType(jac->ksp, &ksptype));
906 PetscOptionsBegin(PetscObjectComm((PetscObject)jac->ksp), ((PetscObject)jac->ksp)->prefix, "Options for Kokkos batch solver", "none");
907 PetscCall(PetscOptionsBool("-ksp_converged_reason", "", "bjkokkos.kokkos.cxx.c", jac->reason, &jac->reason, NULL));
908 PetscCall(PetscOptionsBool("-ksp_monitor", "", "bjkokkos.kokkos.cxx.c", jac->monitor, &jac->monitor, NULL));
909 PetscCall(PetscOptionsInt("-ksp_batch_target", "", "bjkokkos.kokkos.cxx.c", jac->batch_target, &jac->batch_target, NULL));
910 PetscCall(PetscOptionsInt("-ksp_rank_target", "", "bjkokkos.kokkos.cxx.c", jac->rank_target, &jac->rank_target, NULL));
911 PetscCall(PetscOptionsInt("-ksp_batch_nsolves_team", "", "bjkokkos.kokkos.cxx.c", jac->nsolves_team, &jac->nsolves_team, NULL));
912 PetscCheck(jac->batch_target < jac->num_dms, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "-ksp_batch_target (%" PetscInt_FMT ") >= number of DMs (%" PetscInt_FMT ")", jac->batch_target, jac->num_dms);
914 // get blocks - jac->d_bid_eqOffset_k
919 PetscCall(PetscMalloc(sizeof(*jac->dm_Nf) * nDMs, &jac->dm_Nf));
920 PetscCall(PetscInfo(pc, "Have %" PetscInt_FMT " blocks, n=%" PetscInt_FMT " rtol=%g type = %s\n", nDMs, n, (double)jac->ksp->rtol, ((PetscObject)jac->ksp)->type_name));
922 jac->nBlocks = 0;
931 jac->nBlocks += Nf;
933 if (ii == 0) PetscCall(PetscInfo(pc, "%" PetscInt_FMT ") %" PetscInt_FMT " blocks (%" PetscInt_FMT " total)\n", ii, Nf, jac->nBlocks));
935 PetscCall(PetscInfo(pc, "%" PetscInt_FMT ") %" PetscInt_FMT " blocks (%" PetscInt_FMT " total)\n", ii, Nf, jac->nBlocks));
937 jac->dm_Nf[ii] = Nf;
940 Kokkos::View<PetscInt *, Kokkos::LayoutRight, Kokkos::HostSpace> h_block_offsets("block_offsets", jac->nBlocks + 1);
941 if (pack) PetscCall(DMCompositeGetAccessArray(pack, jac->vec_diag, nDMs, NULL, subX));
943 jac->const_block_size = -1;
948 nblk = nloc / jac->dm_Nf[ii];
949 PetscCheck(nloc % jac->dm_Nf[ii] == 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "nloc%%jac->dm_Nf[ii] (%" PetscInt_FMT ") != 0 DMs", nloc % jac->dm_Nf[ii]);
950 for (PetscInt jj = 0; jj < jac->dm_Nf[ii]; jj++, idx++) {
953 if (idx == 0) PetscCall(PetscInfo(pc, "Add first of %" PetscInt_FMT " blocks with %" PetscInt_FMT " equations\n", jac->nBlocks, nblk));
955 PetscCall(PetscInfo(pc, "\t%" PetscInt_FMT ") Add block with %" PetscInt_FMT " equations of %" PetscInt_FMT "\n", idx + 1, nblk, jac->nBlocks));
957 if (jac->const_block_size == -1) jac->const_block_size = nblk;
958 else if (jac->const_block_size > 0 && jac->const_block_size != nblk) jac->const_block_size = 0;
962 PetscCall(DMCompositeRestoreAccessArray(pack, jac->vec_diag, jac->nBlocks, NULL, subX));
966 jac->d_bid_eqOffset_k = new Kokkos::View<PetscInt *, Kokkos::LayoutRight>(Kokkos::create_mirror(Kokkos::DefaultExecutionSpace::memory_space(), h_block_offsets));
967 Kokkos::deep_copy(*jac->d_bid_eqOffset_k, h_block_offsets);
971 { // get jac->d_idiag_k (PC setup),
975 const PetscInt *d_bid_eqOffset = jac->d_bid_eqOffset_k->data(), *r = jac->d_isrow_k->data(), *ic = jac->d_isicol_k->data();
976 PetscScalar *d_idiag = jac->d_idiag_k->data(), *dummy;
981 "Diag", Kokkos::TeamPolicy<>(jac->nBlocks, team_size, PCBJKOKKOS_VEC_SIZE), KOKKOS_LAMBDA(const team_member team) {
1011 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1014 PetscCall(KSPDestroy(&jac->ksp));
1015 PetscCall(VecDestroy(&jac->vec_diag));
1016 if (jac->d_bid_eqOffset_k) delete jac->d_bid_eqOffset_k;
1017 if (jac->d_idiag_k) delete jac->d_idiag_k;
1018 if (jac->d_isrow_k) delete jac->d_isrow_k;
1019 if (jac->d_isicol_k) delete jac->d_isicol_k;
1020 jac->d_bid_eqOffset_k = NULL;
1021 jac->d_idiag_k = NULL;
1022 jac->d_isrow_k = NULL;
1023 jac->d_isicol_k = NULL;
1026 PetscCall(PetscFree(jac->dm_Nf));
1027 jac->dm_Nf = NULL;
1028 if (jac->rowOffsets) delete jac->rowOffsets;
1029 if (jac->colIndices) delete jac->colIndices;
1030 if (jac->batch_b) delete jac->batch_b;
1031 if (jac->batch_x) delete jac->batch_x;
1032 if (jac->batch_values) delete jac->batch_values;
1033 jac->rowOffsets = NULL;
1034 jac->colIndices = NULL;
1035 jac->batch_b = NULL;
1036 jac->batch_x = NULL;
1037 jac->batch_values = NULL;
1051 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1055 if (!jac->ksp) PetscCall(PCBJKOKKOSCreateKSP_BJKOKKOS(pc));
1059 PetscCall(PetscViewerASCIIPrintf(viewer, "\t\tnwork = %" PetscInt_FMT ", rel tol = %e, abs tol = %e, div tol = %e, max it =%" PetscInt_FMT ", type = %s\n", jac->nwork, jac->ksp->rtol, jac->ksp->abstol, jac->ksp->divtol, jac->ksp->max_it,
1060 ((PetscObject)jac->ksp)->type_name));
1075 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1079 PetscCall(KSPDestroy(&jac->ksp));
1080 jac->ksp = ksp;
1114 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1117 if (!jac->ksp) PetscCall(PCBJKOKKOSCreateKSP_BJKOKKOS(pc));
1118 *ksp = jac->ksp;
1153 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1157 ksp->its = jac->max_nits;
1163 PC_PCBJKOKKOS *jac = (PC_PCBJKOKKOS *)pc->data;
1167 jac->ksp->errorifnotconverged = ksp->errorifnotconverged;
1191 PC_PCBJKOKKOS *jac;
1194 PetscCall(PetscNew(&jac));
1195 pc->data = (void *)jac;
1197 jac->ksp = NULL;
1198 jac->vec_diag = NULL;
1199 jac->d_bid_eqOffset_k = NULL;
1200 jac->d_idiag_k = NULL;
1201 jac->d_isrow_k = NULL;
1202 jac->d_isicol_k = NULL;
1203 jac->nBlocks = 1;
1204 jac->max_nits = 0;
1217 jac->rowOffsets = NULL;
1218 jac->colIndices = NULL;
1219 jac->batch_b = NULL;
1220 jac->batch_x = NULL;
1221 jac->batch_values = NULL;