1ab5c6b0cSJed Brown #include <../src/ksp/pc/impls/bddc/bddc.h> 2ab5c6b0cSJed Brown #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3674ae819SStefano Zampini 4*34a97f8cSStefano Zampini /* prototypes for deluxe functions */ 5*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 6*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 7*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 8*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]); 9*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]); 10*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 11674ae819SStefano Zampini 12674ae819SStefano Zampini #undef __FUNCT__ 13674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension_Basic" 14674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 15674ae819SStefano Zampini { 16674ae819SStefano Zampini PC_IS* pcis = (PC_IS*)pc->data; 17674ae819SStefano Zampini PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 18674ae819SStefano Zampini PetscErrorCode ierr; 19674ae819SStefano Zampini 20674ae819SStefano Zampini PetscFunctionBegin; 21674ae819SStefano Zampini /* Apply partition of unity */ 22674ae819SStefano Zampini ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr); 23674ae819SStefano Zampini ierr = VecSet(global_vector,0.0);CHKERRQ(ierr); 24674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 25674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 26674ae819SStefano Zampini PetscFunctionReturn(0); 27674ae819SStefano Zampini } 28674ae819SStefano Zampini 29674ae819SStefano Zampini #undef __FUNCT__ 30674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension_Deluxe" 31674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 32674ae819SStefano Zampini { 33674ae819SStefano Zampini PC_IS* pcis=(PC_IS*)pc->data; 34674ae819SStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 35674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 36*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs; 37674ae819SStefano Zampini PetscInt i; 38674ae819SStefano Zampini PetscErrorCode ierr; 39674ae819SStefano Zampini 40674ae819SStefano Zampini /* TODO CHECK STUFF RELATED WITH FAKE WORK */ 41674ae819SStefano Zampini PetscFunctionBegin; 42*34a97f8cSStefano Zampini ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */ 43*34a97f8cSStefano Zampini if (deluxe_ctx->n_simple) { 44674ae819SStefano Zampini /* scale deluxe vertices using diagonal scaling */ 45*34a97f8cSStefano Zampini PetscScalar *array_x,*array_D,*array; 46674ae819SStefano Zampini ierr = VecGetArray(x,&array_x);CHKERRQ(ierr); 47674ae819SStefano Zampini ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr); 48674ae819SStefano Zampini ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 49674ae819SStefano Zampini for (i=0;i<deluxe_ctx->n_simple;i++) { 50674ae819SStefano Zampini array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]]; 51674ae819SStefano Zampini } 52674ae819SStefano Zampini ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr); 53674ae819SStefano Zampini ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr); 54674ae819SStefano Zampini ierr = VecRestoreArray(x,&array_x);CHKERRQ(ierr); 55*34a97f8cSStefano Zampini } 56*34a97f8cSStefano Zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */ 57674ae819SStefano Zampini if (deluxe_ctx->seq_mat) { 58674ae819SStefano Zampini ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 59674ae819SStefano Zampini ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 60674ae819SStefano Zampini ierr = MatMult(deluxe_ctx->seq_mat,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr); 61674ae819SStefano Zampini ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr); 62674ae819SStefano Zampini /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */ 63674ae819SStefano Zampini ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 64674ae819SStefano Zampini ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 65674ae819SStefano Zampini } 66674ae819SStefano Zampini /* parallel part */ 67674ae819SStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 68674ae819SStefano Zampini if (deluxe_ctx->par_ksp[i]) { 69*34a97f8cSStefano Zampini PetscMPIInt color_rank; 70*34a97f8cSStefano Zampini PetscInt subidx = deluxe_ctx->par_col2sub[i]; 71*34a97f8cSStefano Zampini /* restrict on subset */ 72*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 73*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74*34a97f8cSStefano Zampini /* S_Ej */ 75*34a97f8cSStefano Zampini ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 76*34a97f8cSStefano Zampini /* (\sum_j S_Ej)^-1 */ 77*34a97f8cSStefano Zampini ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 78*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 80674ae819SStefano Zampini ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 81*34a97f8cSStefano Zampini ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr); 82*34a97f8cSStefano Zampini /* get back solution on subset */ 83*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 84*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 85*34a97f8cSStefano Zampini if (!color_rank) { /* only the master process in coloured comm copies the computed values */ 86*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 87*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 88674ae819SStefano Zampini } 89674ae819SStefano Zampini } 90674ae819SStefano Zampini } 91674ae819SStefano Zampini /* put local boundary part in global vector */ 92*34a97f8cSStefano Zampini ierr = VecSet(y,0.0);CHKERRQ(ierr); 93674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 94674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 95674ae819SStefano Zampini PetscFunctionReturn(0); 96674ae819SStefano Zampini } 97674ae819SStefano Zampini 98674ae819SStefano Zampini #undef __FUNCT__ 99674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension" 100674ae819SStefano Zampini PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 101674ae819SStefano Zampini { 102674ae819SStefano Zampini PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 103674ae819SStefano Zampini PetscErrorCode ierr; 104674ae819SStefano Zampini 105674ae819SStefano Zampini PetscFunctionBegin; 106674ae819SStefano Zampini PetscValidHeaderSpecific(pc,PC_CLASSID,1); 107674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2); 108674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3); 109674ae819SStefano Zampini if (local_interface_vector == pcbddc->work_scaling) { 110674ae819SStefano Zampini SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n"); 111674ae819SStefano Zampini } 112674ae819SStefano Zampini ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr); 113674ae819SStefano Zampini PetscFunctionReturn(0); 114674ae819SStefano Zampini } 115674ae819SStefano Zampini 116674ae819SStefano Zampini #undef __FUNCT__ 117674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction_Basic" 118674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 119674ae819SStefano Zampini { 120674ae819SStefano Zampini PetscErrorCode ierr; 121674ae819SStefano Zampini PC_IS* pcis = (PC_IS*)pc->data; 122674ae819SStefano Zampini 123674ae819SStefano Zampini PetscFunctionBegin; 124674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 126674ae819SStefano Zampini /* Apply partition of unity */ 127674ae819SStefano Zampini ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr); 128674ae819SStefano Zampini PetscFunctionReturn(0); 129674ae819SStefano Zampini } 130674ae819SStefano Zampini 131674ae819SStefano Zampini #undef __FUNCT__ 132674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe" 133674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 134674ae819SStefano Zampini { 135674ae819SStefano Zampini PC_IS* pcis=(PC_IS*)pc->data; 136674ae819SStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 137674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 138*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs; 139674ae819SStefano Zampini PetscInt i; 140674ae819SStefano Zampini PetscErrorCode ierr; 141674ae819SStefano Zampini 142674ae819SStefano Zampini PetscFunctionBegin; 143674ae819SStefano Zampini /* get local boundary part of global vector */ 144674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 145674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 146*34a97f8cSStefano Zampini if (deluxe_ctx->n_simple) { 147*34a97f8cSStefano Zampini /* scale deluxe vertices using diagonal scaling */ 148*34a97f8cSStefano Zampini PetscScalar *array_y,*array_D; 149674ae819SStefano Zampini ierr = VecGetArray(y,&array_y);CHKERRQ(ierr); 150674ae819SStefano Zampini ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr); 151674ae819SStefano Zampini for (i=0;i<deluxe_ctx->n_simple;i++) { 152674ae819SStefano Zampini array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 153674ae819SStefano Zampini } 154674ae819SStefano Zampini ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr); 155674ae819SStefano Zampini ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr); 156*34a97f8cSStefano Zampini } 157*34a97f8cSStefano Zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */ 158674ae819SStefano Zampini if (deluxe_ctx->seq_mat) { 159674ae819SStefano Zampini ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 160674ae819SStefano Zampini ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 161674ae819SStefano Zampini ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr); 162674ae819SStefano Zampini ierr = MatMultTranspose(deluxe_ctx->seq_mat,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr); 163674ae819SStefano Zampini ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 164674ae819SStefano Zampini ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 165674ae819SStefano Zampini } 166674ae819SStefano Zampini /* parallel part */ 167674ae819SStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 168674ae819SStefano Zampini if (deluxe_ctx->par_ksp[i]) { 169*34a97f8cSStefano Zampini PetscInt subidx = deluxe_ctx->par_col2sub[i]; 170*34a97f8cSStefano Zampini /* restrict on subset */ 171*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 172*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 173*34a97f8cSStefano Zampini /* (\sum_j S_Ej)^-T */ 174*34a97f8cSStefano Zampini ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 175*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 176*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 177674ae819SStefano Zampini ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 178*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 179*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 180*34a97f8cSStefano Zampini /* S_Ej^T */ 181*34a97f8cSStefano Zampini ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 182*34a97f8cSStefano Zampini /* extend to boundary */ 183*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 184*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 185674ae819SStefano Zampini } 186674ae819SStefano Zampini } 187674ae819SStefano Zampini PetscFunctionReturn(0); 188674ae819SStefano Zampini } 189674ae819SStefano Zampini 190674ae819SStefano Zampini #undef __FUNCT__ 191674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction" 192674ae819SStefano Zampini PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 193674ae819SStefano Zampini { 194674ae819SStefano Zampini PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 195674ae819SStefano Zampini PetscErrorCode ierr; 196674ae819SStefano Zampini 197674ae819SStefano Zampini PetscFunctionBegin; 198674ae819SStefano Zampini PetscValidHeaderSpecific(pc,PC_CLASSID,1); 199674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2); 200674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3); 201674ae819SStefano Zampini if (local_interface_vector == pcbddc->work_scaling) { 202674ae819SStefano Zampini SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n"); 203674ae819SStefano Zampini } 204674ae819SStefano Zampini ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr); 205674ae819SStefano Zampini PetscFunctionReturn(0); 206674ae819SStefano Zampini } 207674ae819SStefano Zampini 208674ae819SStefano Zampini #undef __FUNCT__ 209674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp" 210674ae819SStefano Zampini PetscErrorCode PCBDDCScalingSetUp(PC pc) 211674ae819SStefano Zampini { 212674ae819SStefano Zampini PC_IS* pcis=(PC_IS*)pc->data; 213674ae819SStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 214674ae819SStefano Zampini PetscErrorCode ierr; 215674ae819SStefano Zampini 216674ae819SStefano Zampini PetscFunctionBegin; 217674ae819SStefano Zampini PetscValidHeaderSpecific(pc,PC_CLASSID,1); 218674ae819SStefano Zampini /* create work vector for the operator */ 219*34a97f8cSStefano Zampini ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 220674ae819SStefano Zampini ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr); 221*34a97f8cSStefano Zampini /* always rebuild pcis->D */ 22228d874f6SStefano Zampini if (pcis->use_stiffness_scaling) { 223674ae819SStefano Zampini ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr); 224674ae819SStefano Zampini ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 225674ae819SStefano Zampini ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 226674ae819SStefano Zampini } 227674ae819SStefano Zampini ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr); 228674ae819SStefano Zampini ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 229674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 230674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 231674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 232674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 233674ae819SStefano Zampini ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 234674ae819SStefano Zampini /* now setup */ 235681e7c04SStefano Zampini if (pcbddc->use_deluxe_scaling) { 236*34a97f8cSStefano Zampini if (!pcbddc->deluxe_ctx) { 237*34a97f8cSStefano Zampini ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr); 238*34a97f8cSStefano Zampini } 239*34a97f8cSStefano Zampini ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr); 240674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr); 241674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr); 242674ae819SStefano Zampini } else { 243674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr); 244674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr); 245674ae819SStefano Zampini } 246*34a97f8cSStefano Zampini 247674ae819SStefano Zampini /* test */ 248674ae819SStefano Zampini if (pcbddc->dbg_flag) { 249*34a97f8cSStefano Zampini Vec vec2_global; 250674ae819SStefano Zampini PetscViewer viewer=pcbddc->dbg_viewer; 251*34a97f8cSStefano Zampini PetscReal error; 252674ae819SStefano Zampini 253674ae819SStefano Zampini /* extension -> from local to parallel */ 254*34a97f8cSStefano Zampini ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 255*34a97f8cSStefano Zampini ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 256*34a97f8cSStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 257*34a97f8cSStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 258*34a97f8cSStefano Zampini ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr); 259*34a97f8cSStefano Zampini ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr); 260*34a97f8cSStefano Zampini 261674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 262674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 263674ae819SStefano Zampini ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); 264*34a97f8cSStefano Zampini ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr); 265*34a97f8cSStefano Zampini ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 266674ae819SStefano Zampini ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr); 267*34a97f8cSStefano Zampini if (error>1.e-8 && pcbddc->dbg_flag>1) { 268674ae819SStefano Zampini ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr); 269674ae819SStefano Zampini } 270*34a97f8cSStefano Zampini ierr = VecDestroy(&vec2_global);CHKERRQ(ierr); 271*34a97f8cSStefano Zampini 272674ae819SStefano Zampini /* restriction -> from parallel to local */ 273674ae819SStefano Zampini ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 274*34a97f8cSStefano Zampini ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr); 275674ae819SStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 276674ae819SStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 277*34a97f8cSStefano Zampini 278*34a97f8cSStefano Zampini ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr); 279*34a97f8cSStefano Zampini ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr); 280*34a97f8cSStefano Zampini ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 281*34a97f8cSStefano Zampini ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 282*34a97f8cSStefano Zampini ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr); 283*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr); 284*34a97f8cSStefano Zampini if (error>1.e-8 && pcbddc->dbg_flag>1) { 285674ae819SStefano Zampini ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr); 286674ae819SStefano Zampini } 287674ae819SStefano Zampini } 288674ae819SStefano Zampini PetscFunctionReturn(0); 289674ae819SStefano Zampini } 290674ae819SStefano Zampini 291674ae819SStefano Zampini #undef __FUNCT__ 292674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingDestroy" 293674ae819SStefano Zampini PetscErrorCode PCBDDCScalingDestroy(PC pc) 294674ae819SStefano Zampini { 295674ae819SStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 296674ae819SStefano Zampini PetscErrorCode ierr; 297674ae819SStefano Zampini 298674ae819SStefano Zampini PetscFunctionBegin; 299*34a97f8cSStefano Zampini if (pcbddc->deluxe_ctx) { 300*34a97f8cSStefano Zampini ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr); 301674ae819SStefano Zampini } 302674ae819SStefano Zampini ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr); 303674ae819SStefano Zampini /* remove functions */ 304674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr); 305674ae819SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr); 306674ae819SStefano Zampini PetscFunctionReturn(0); 307674ae819SStefano Zampini } 308674ae819SStefano Zampini 309*34a97f8cSStefano Zampini #undef __FUNCT__ 310*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingCreate_Deluxe" 311*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 312*34a97f8cSStefano Zampini { 313*34a97f8cSStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 314*34a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx; 315*34a97f8cSStefano Zampini PetscErrorCode ierr; 316*34a97f8cSStefano Zampini 317*34a97f8cSStefano Zampini PetscFunctionBegin; 318*34a97f8cSStefano Zampini ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr); 319*34a97f8cSStefano Zampini ierr = PCBDDCSubSchursCreate(&deluxe_ctx->sub_schurs);CHKERRQ(ierr); 320*34a97f8cSStefano Zampini pcbddc->deluxe_ctx = deluxe_ctx; 321*34a97f8cSStefano Zampini PetscFunctionReturn(0); 322*34a97f8cSStefano Zampini } 323*34a97f8cSStefano Zampini 324*34a97f8cSStefano Zampini #undef __FUNCT__ 325*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe" 326*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 327*34a97f8cSStefano Zampini { 328*34a97f8cSStefano Zampini PC_BDDC* pcbddc=(PC_BDDC*)pc->data; 329*34a97f8cSStefano Zampini PetscErrorCode ierr; 330*34a97f8cSStefano Zampini 331*34a97f8cSStefano Zampini PetscFunctionBegin; 332*34a97f8cSStefano Zampini ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr); 333*34a97f8cSStefano Zampini ierr = PCBDDCSubSchursDestroy(&(pcbddc->deluxe_ctx->sub_schurs));CHKERRQ(ierr); 334*34a97f8cSStefano Zampini ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr); 335*34a97f8cSStefano Zampini PetscFunctionReturn(0); 336*34a97f8cSStefano Zampini } 337*34a97f8cSStefano Zampini 338*34a97f8cSStefano Zampini #undef __FUNCT__ 339*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers" 340*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 341*34a97f8cSStefano Zampini { 342*34a97f8cSStefano Zampini PetscErrorCode ierr; 343*34a97f8cSStefano Zampini 344*34a97f8cSStefano Zampini PetscFunctionBegin; 345*34a97f8cSStefano Zampini ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 346*34a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 347*34a97f8cSStefano Zampini if (deluxe_ctx->seq_mat) { 348*34a97f8cSStefano Zampini ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr); 349*34a97f8cSStefano Zampini ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr); 350*34a97f8cSStefano Zampini ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr); 351*34a97f8cSStefano Zampini ierr = MatDestroy(&deluxe_ctx->seq_mat);CHKERRQ(ierr); 352*34a97f8cSStefano Zampini ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr); 353*34a97f8cSStefano Zampini } 354*34a97f8cSStefano Zampini if (deluxe_ctx->par_colors) { 355*34a97f8cSStefano Zampini PetscInt i; 356*34a97f8cSStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 357*34a97f8cSStefano Zampini ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr); 358*34a97f8cSStefano Zampini ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr); 359*34a97f8cSStefano Zampini ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 360*34a97f8cSStefano Zampini ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 361*34a97f8cSStefano Zampini } 362*34a97f8cSStefano Zampini ierr = PetscFree5(deluxe_ctx->par_ksp, 363*34a97f8cSStefano Zampini deluxe_ctx->par_scctx_s, 364*34a97f8cSStefano Zampini deluxe_ctx->par_scctx_p, 365*34a97f8cSStefano Zampini deluxe_ctx->par_vec, 366*34a97f8cSStefano Zampini deluxe_ctx->par_col2sub);CHKERRQ(ierr); 367*34a97f8cSStefano Zampini } 368*34a97f8cSStefano Zampini deluxe_ctx->par_colors = 0; 369*34a97f8cSStefano Zampini PetscFunctionReturn(0); 370*34a97f8cSStefano Zampini } 371*34a97f8cSStefano Zampini 372*34a97f8cSStefano Zampini #undef __FUNCT__ 373*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe" 374*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 375*34a97f8cSStefano Zampini { 376*34a97f8cSStefano Zampini PC_IS *pcis=(PC_IS*)pc->data; 377*34a97f8cSStefano Zampini PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 378*34a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 379*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs=deluxe_ctx->sub_schurs; 380*34a97f8cSStefano Zampini PCBDDCGraph graph; 381*34a97f8cSStefano Zampini PetscInt *index_sequential,*index_parallel; 382*34a97f8cSStefano Zampini PetscInt *auxlocal_sequential,*auxlocal_parallel; 383*34a97f8cSStefano Zampini PetscInt *auxglobal_sequential,*auxglobal_parallel; 384*34a97f8cSStefano Zampini PetscInt *auxmapping; 385*34a97f8cSStefano Zampini PetscInt i,j,min_loc; 386*34a97f8cSStefano Zampini PetscInt n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems; 387*34a97f8cSStefano Zampini PetscErrorCode ierr; 388*34a97f8cSStefano Zampini 389*34a97f8cSStefano Zampini PetscFunctionBegin; 390*34a97f8cSStefano Zampini /* throw away the solvers */ 391*34a97f8cSStefano Zampini ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); 392*34a97f8cSStefano Zampini 393*34a97f8cSStefano Zampini /* attach interface graph for determining subsets */ 394*34a97f8cSStefano Zampini if (pcbddc->deluxe_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 395*34a97f8cSStefano Zampini PetscInt *idx_V_N; 396*34a97f8cSStefano Zampini IS verticesIS; 397*34a97f8cSStefano Zampini ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&idx_V_N);CHKERRQ(ierr); 398*34a97f8cSStefano Zampini ierr = ISCreateGeneral(PETSC_COMM_SELF,i,idx_V_N,PETSC_OWN_POINTER,&verticesIS);CHKERRQ(ierr); 399*34a97f8cSStefano Zampini ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 400*34a97f8cSStefano Zampini ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 401*34a97f8cSStefano Zampini ierr = PCBDDCGraphSetUp(graph,0,PETSC_NULL,pcbddc->DirichletBoundaries,0,PETSC_NULL,verticesIS);CHKERRQ(ierr); 402*34a97f8cSStefano Zampini ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 403*34a97f8cSStefano Zampini ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 404*34a97f8cSStefano Zampini /* 405*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 406*34a97f8cSStefano Zampini ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 407*34a97f8cSStefano Zampini } 408*34a97f8cSStefano Zampini */ 409*34a97f8cSStefano Zampini } else { 410*34a97f8cSStefano Zampini graph = pcbddc->mat_graph; 411*34a97f8cSStefano Zampini } 412*34a97f8cSStefano Zampini 413*34a97f8cSStefano Zampini /* map interface's subsets */ 414*34a97f8cSStefano Zampini j = 0; 415*34a97f8cSStefano Zampini for (i=0;i<graph->ncc;i++) { 416*34a97f8cSStefano Zampini j = PetscMax(j,graph->cptr[i+1]-graph->cptr[i]); 417*34a97f8cSStefano Zampini } 418*34a97f8cSStefano Zampini ierr = PetscMalloc5(j,&auxmapping, 419*34a97f8cSStefano Zampini graph->ncc,&auxlocal_sequential, 420*34a97f8cSStefano Zampini graph->ncc,&auxlocal_parallel, 421*34a97f8cSStefano Zampini graph->ncc,&index_sequential, 422*34a97f8cSStefano Zampini graph->ncc,&index_parallel);CHKERRQ(ierr); 423*34a97f8cSStefano Zampini 424*34a97f8cSStefano Zampini n_local_sequential_problems = 0; 425*34a97f8cSStefano Zampini n_local_parallel_problems = 0; 426*34a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 427*34a97f8cSStefano Zampini for (i=0;i<graph->ncc;i++) { 428*34a97f8cSStefano Zampini PetscInt subset_size = graph->cptr[i+1]-graph->cptr[i]; 429*34a97f8cSStefano Zampini if (subset_size > 1) { 430*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,&graph->queue[graph->cptr[i]],auxmapping);CHKERRQ(ierr); 431*34a97f8cSStefano Zampini min_loc = 0; 432*34a97f8cSStefano Zampini for (j=1;j<subset_size;j++) { 433*34a97f8cSStefano Zampini if (auxmapping[j]<auxmapping[min_loc]) { 434*34a97f8cSStefano Zampini min_loc = j; 435*34a97f8cSStefano Zampini } 436*34a97f8cSStefano Zampini } 437*34a97f8cSStefano Zampini if (subset_size > pcbddc->deluxe_threshold) { 438*34a97f8cSStefano Zampini index_parallel[n_local_parallel_problems] = i; 439*34a97f8cSStefano Zampini auxlocal_parallel[n_local_parallel_problems] = graph->queue[graph->cptr[i]+min_loc]; 440*34a97f8cSStefano Zampini n_local_parallel_problems++; 441*34a97f8cSStefano Zampini } else { 442*34a97f8cSStefano Zampini index_sequential[n_local_sequential_problems] = i; 443*34a97f8cSStefano Zampini auxlocal_sequential[n_local_sequential_problems] = graph->queue[graph->cptr[i]+min_loc]; 444*34a97f8cSStefano Zampini n_local_sequential_problems++; 445*34a97f8cSStefano Zampini } 446*34a97f8cSStefano Zampini } else { 447*34a97f8cSStefano Zampini deluxe_ctx->n_simple++; 448*34a97f8cSStefano Zampini } 449*34a97f8cSStefano Zampini } 450*34a97f8cSStefano Zampini 451*34a97f8cSStefano Zampini /* diagonal scaling on size 1 cc and on dirichlet boundary dofs */ 452*34a97f8cSStefano Zampini for (i=0;i<graph->nvtxs;i++) { 453*34a97f8cSStefano Zampini if (graph->count[i] && graph->special_dof[i] == PCBDDCGRAPH_DIRICHLET_MARK) { 454*34a97f8cSStefano Zampini deluxe_ctx->n_simple++; 455*34a97f8cSStefano Zampini } 456*34a97f8cSStefano Zampini } 457*34a97f8cSStefano Zampini ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 458*34a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 459*34a97f8cSStefano Zampini for (i=0;i<graph->ncc;i++) { 460*34a97f8cSStefano Zampini if (graph->cptr[i+1]-graph->cptr[i]==1) { 461*34a97f8cSStefano Zampini deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = graph->queue[graph->cptr[i]]; 462*34a97f8cSStefano Zampini } 463*34a97f8cSStefano Zampini } 464*34a97f8cSStefano Zampini for (i=0;i<graph->nvtxs;i++) { 465*34a97f8cSStefano Zampini if (graph->count[i] && graph->special_dof[i] == PCBDDCGRAPH_DIRICHLET_MARK) { 466*34a97f8cSStefano Zampini deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i; 467*34a97f8cSStefano Zampini } 468*34a97f8cSStefano Zampini } 469*34a97f8cSStefano Zampini ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 470*34a97f8cSStefano Zampini if (i != deluxe_ctx->n_simple) { 471*34a97f8cSStefano Zampini SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple); 472*34a97f8cSStefano Zampini } 473*34a97f8cSStefano Zampini ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); 474*34a97f8cSStefano Zampini 475*34a97f8cSStefano Zampini /* SetUp local schur complements on subsets TODO better reuse procedure */ 476*34a97f8cSStefano Zampini if (!sub_schurs->n_subs) { 477*34a97f8cSStefano Zampini Mat S_j; 478*34a97f8cSStefano Zampini PetscBool free_used_adj; 479*34a97f8cSStefano Zampini PetscInt *used_xadj,*used_adjncy; 480*34a97f8cSStefano Zampini 481*34a97f8cSStefano Zampini /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 482*34a97f8cSStefano Zampini free_used_adj = PETSC_FALSE; 483*34a97f8cSStefano Zampini if (pcbddc->deluxe_layers == -1) { 484*34a97f8cSStefano Zampini used_xadj = NULL; 485*34a97f8cSStefano Zampini used_adjncy = NULL; 486*34a97f8cSStefano Zampini } else { 487*34a97f8cSStefano Zampini if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) { 488*34a97f8cSStefano Zampini used_xadj = pcbddc->mat_graph->xadj; 489*34a97f8cSStefano Zampini used_adjncy = pcbddc->mat_graph->adjncy; 490*34a97f8cSStefano Zampini } else { 491*34a97f8cSStefano Zampini Mat mat_adj; 492*34a97f8cSStefano Zampini PetscBool flg_row=PETSC_TRUE; 493*34a97f8cSStefano Zampini const PetscInt *xadj,*adjncy; 494*34a97f8cSStefano Zampini PetscInt nvtxs; 495*34a97f8cSStefano Zampini 496*34a97f8cSStefano Zampini ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 497*34a97f8cSStefano Zampini ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 498*34a97f8cSStefano Zampini if (!flg_row) { 499*34a97f8cSStefano Zampini SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 500*34a97f8cSStefano Zampini } 501*34a97f8cSStefano Zampini ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 502*34a97f8cSStefano Zampini ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 503*34a97f8cSStefano Zampini ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 504*34a97f8cSStefano Zampini ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 505*34a97f8cSStefano Zampini if (!flg_row) { 506*34a97f8cSStefano Zampini SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 507*34a97f8cSStefano Zampini } 508*34a97f8cSStefano Zampini ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 509*34a97f8cSStefano Zampini free_used_adj = PETSC_TRUE; 510*34a97f8cSStefano Zampini } 511*34a97f8cSStefano Zampini } 512*34a97f8cSStefano Zampini 513*34a97f8cSStefano Zampini /* Create Schur complement matrix */ 514*34a97f8cSStefano Zampini ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 515*34a97f8cSStefano Zampini ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 516*34a97f8cSStefano Zampini /* setup Schur complements on subsets */ 517*34a97f8cSStefano Zampini ierr = PCBDDCSubSchursSetUp(sub_schurs,S_j,pcis->is_I_local,pcis->is_B_local,graph->ncc,graph->cptr,graph->queue,used_xadj,used_adjncy,PETSC_TRUE,pcbddc->deluxe_layers);CHKERRQ(ierr); 518*34a97f8cSStefano Zampini ierr = MatDestroy(&S_j);CHKERRQ(ierr); 519*34a97f8cSStefano Zampini /* free adjacency */ 520*34a97f8cSStefano Zampini if (free_used_adj) { 521*34a97f8cSStefano Zampini ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 522*34a97f8cSStefano Zampini } 523*34a97f8cSStefano Zampini } 524*34a97f8cSStefano Zampini 525*34a97f8cSStefano Zampini /* Number parallel problems */ 526*34a97f8cSStefano Zampini auxglobal_parallel = 0; 527*34a97f8cSStefano Zampini ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr); 528*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 529*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of parallel subproblems: %d\n",n_parallel_problems); 530*34a97f8cSStefano Zampini } 531*34a97f8cSStefano Zampini 532*34a97f8cSStefano Zampini /* Compute data structures to solve parallel problems */ 533*34a97f8cSStefano Zampini ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,n_local_parallel_problems,n_parallel_problems,auxglobal_parallel,index_parallel);CHKERRQ(ierr); 534*34a97f8cSStefano Zampini ierr = PetscFree(auxglobal_parallel);CHKERRQ(ierr); 535*34a97f8cSStefano Zampini 536*34a97f8cSStefano Zampini 537*34a97f8cSStefano Zampini /* Number sequential problems */ 538*34a97f8cSStefano Zampini auxglobal_sequential = 0; 539*34a97f8cSStefano Zampini ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr); 540*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 541*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of sequential subproblems: %d\n",n_sequential_problems); 542*34a97f8cSStefano Zampini } 543*34a97f8cSStefano Zampini 544*34a97f8cSStefano Zampini /* Compute data structures to solve sequential problems */ 545*34a97f8cSStefano Zampini ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc,n_local_sequential_problems,n_sequential_problems,auxglobal_sequential,index_sequential);CHKERRQ(ierr); 546*34a97f8cSStefano Zampini ierr = PetscFree(auxglobal_sequential);CHKERRQ(ierr); 547*34a97f8cSStefano Zampini 548*34a97f8cSStefano Zampini /* free workspace */ 549*34a97f8cSStefano Zampini ierr = PetscFree5(auxmapping,auxlocal_sequential,auxlocal_parallel,index_sequential,index_parallel);CHKERRQ(ierr); 550*34a97f8cSStefano Zampini 551*34a97f8cSStefano Zampini /* free graph struct */ 552*34a97f8cSStefano Zampini if (pcbddc->deluxe_rebuild) { 553*34a97f8cSStefano Zampini ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 554*34a97f8cSStefano Zampini } 555*34a97f8cSStefano Zampini PetscFunctionReturn(0); 556*34a97f8cSStefano Zampini } 557*34a97f8cSStefano Zampini 558*34a97f8cSStefano Zampini #undef __FUNCT__ 559*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par" 560*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[]) 561*34a97f8cSStefano Zampini { 562*34a97f8cSStefano Zampini PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 563*34a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 564*34a97f8cSStefano Zampini /* coloring */ 565*34a97f8cSStefano Zampini Mat parallel_problems; 566*34a97f8cSStefano Zampini MatColoring coloring_obj; 567*34a97f8cSStefano Zampini ISColoring coloring_parallel_problems; 568*34a97f8cSStefano Zampini IS *par_is_colors,*is_colors; 569*34a97f8cSStefano Zampini /* working stuff */ 570*34a97f8cSStefano Zampini PetscInt i,j; 571*34a97f8cSStefano Zampini PetscErrorCode ierr; 572*34a97f8cSStefano Zampini 573*34a97f8cSStefano Zampini PetscFunctionBegin; 574*34a97f8cSStefano Zampini if (!n_parallel_problems) { 575*34a97f8cSStefano Zampini PetscFunctionReturn(0); 576*34a97f8cSStefano Zampini } 577*34a97f8cSStefano Zampini /* Color parallel subproblems */ 578*34a97f8cSStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)pc),¶llel_problems);CHKERRQ(ierr); 579*34a97f8cSStefano Zampini ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr); 580*34a97f8cSStefano Zampini ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr); 581*34a97f8cSStefano Zampini ierr = MatSetUp(parallel_problems);CHKERRQ(ierr); 582*34a97f8cSStefano Zampini ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 583*34a97f8cSStefano Zampini ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 584*34a97f8cSStefano Zampini for (i=0;i<n_local_parallel_problems;i++) { 585*34a97f8cSStefano Zampini PetscInt row = global_parallel[i]; 586*34a97f8cSStefano Zampini for (j=0;j<n_local_parallel_problems;j++) { 587*34a97f8cSStefano Zampini PetscInt col = global_parallel[j]; 588*34a97f8cSStefano Zampini if (row != col) { 589*34a97f8cSStefano Zampini ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 590*34a97f8cSStefano Zampini } 591*34a97f8cSStefano Zampini } 592*34a97f8cSStefano Zampini } 593*34a97f8cSStefano Zampini ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 594*34a97f8cSStefano Zampini ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 595*34a97f8cSStefano Zampini if (pcbddc->dbg_flag > 1) { 596*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr); 598*34a97f8cSStefano Zampini ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr); 599*34a97f8cSStefano Zampini } 600*34a97f8cSStefano Zampini ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr); 601*34a97f8cSStefano Zampini ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr); 602*34a97f8cSStefano Zampini ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr); 603*34a97f8cSStefano Zampini ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr); 604*34a97f8cSStefano Zampini ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr); 605*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 606*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 607*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr); 608*34a97f8cSStefano Zampini } 609*34a97f8cSStefano Zampini 610*34a97f8cSStefano Zampini /* all procs should know the color distribution */ 611*34a97f8cSStefano Zampini ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr); 612*34a97f8cSStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 613*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 614*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr); 615*34a97f8cSStefano Zampini ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr); 616*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 617*34a97f8cSStefano Zampini } 618*34a97f8cSStefano Zampini ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr); 619*34a97f8cSStefano Zampini } 620*34a97f8cSStefano Zampini 621*34a97f8cSStefano Zampini /* free unneeded objects */ 622*34a97f8cSStefano Zampini ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr); 623*34a97f8cSStefano Zampini ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr); 624*34a97f8cSStefano Zampini ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr); 625*34a97f8cSStefano Zampini ierr = MatDestroy(¶llel_problems);CHKERRQ(ierr); 626*34a97f8cSStefano Zampini 627*34a97f8cSStefano Zampini /* allocate deluxe arrays for parallel problems */ 628*34a97f8cSStefano Zampini ierr = PetscMalloc5(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp, 629*34a97f8cSStefano Zampini deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s, 630*34a97f8cSStefano Zampini deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p, 631*34a97f8cSStefano Zampini deluxe_ctx->par_colors,&deluxe_ctx->par_vec, 632*34a97f8cSStefano Zampini deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr); 633*34a97f8cSStefano Zampini 634*34a97f8cSStefano Zampini /* cycle on colors */ 635*34a97f8cSStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 636*34a97f8cSStefano Zampini PetscSubcomm par_subcomm; 637*34a97f8cSStefano Zampini const PetscInt* idxs_subproblems; 638*34a97f8cSStefano Zampini PetscInt color_size; 639*34a97f8cSStefano Zampini PetscMPIInt rank,active_color; 640*34a97f8cSStefano Zampini 641*34a97f8cSStefano Zampini /* get local index of i-th parallel colored problem */ 642*34a97f8cSStefano Zampini ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr); 643*34a97f8cSStefano Zampini ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr); 644*34a97f8cSStefano Zampini /* split comm for computing parallel problems for this color */ 645*34a97f8cSStefano Zampini /* Processes not partecipating at this stage will have color = color_size */ 646*34a97f8cSStefano Zampini /* because PetscCommDuplicate does not handle MPI_COMM_NULL */ 647*34a97f8cSStefano Zampini active_color = color_size; 648*34a97f8cSStefano Zampini deluxe_ctx->par_col2sub[i] = -1; 649*34a97f8cSStefano Zampini for (j=0;j<n_local_parallel_problems;j++) { 650*34a97f8cSStefano Zampini PetscInt local_idx; 651*34a97f8cSStefano Zampini ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr); 652*34a97f8cSStefano Zampini if (local_idx > -1) { 653*34a97f8cSStefano Zampini ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr); 654*34a97f8cSStefano Zampini deluxe_ctx->par_col2sub[i] = index_parallel[j]; 655*34a97f8cSStefano Zampini break; 656*34a97f8cSStefano Zampini } 657*34a97f8cSStefano Zampini } 658*34a97f8cSStefano Zampini ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr); 659*34a97f8cSStefano Zampini ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr); 660*34a97f8cSStefano Zampini ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr); 661*34a97f8cSStefano Zampini ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 662*34a97f8cSStefano Zampini ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr); 663*34a97f8cSStefano Zampini /* print debug info */ 664*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 665*34a97f8cSStefano Zampini PetscMPIInt crank,csize; 666*34a97f8cSStefano Zampini ierr = MPI_Comm_rank(par_subcomm->comm,&crank);CHKERRQ(ierr); 667*34a97f8cSStefano Zampini ierr = MPI_Comm_size(par_subcomm->comm,&csize);CHKERRQ(ierr); 668*34a97f8cSStefano Zampini ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr); 669*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 670*34a97f8cSStefano Zampini ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 671*34a97f8cSStefano Zampini ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr); 672*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 673*34a97f8cSStefano Zampini } 674*34a97f8cSStefano Zampini 675*34a97f8cSStefano Zampini if (deluxe_ctx->par_col2sub[i] >= 0) { 676*34a97f8cSStefano Zampini PC pc; 677*34a97f8cSStefano Zampini Mat color_mat,color_mat_is,temp_mat; 678*34a97f8cSStefano Zampini ISLocalToGlobalMapping WtoNmap,l2gmap_subset; 679*34a97f8cSStefano Zampini IS is_local_numbering,isB_local,isW_local,isW; 680*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs; 681*34a97f8cSStefano Zampini PetscInt subidx,n_local_dofs,n_global_dofs; 682*34a97f8cSStefano Zampini PetscInt *global_numbering,*local_numbering; 683*34a97f8cSStefano Zampini char ksp_prefix[256]; 684*34a97f8cSStefano Zampini size_t len; 685*34a97f8cSStefano Zampini 686*34a97f8cSStefano Zampini /* Local index for schur complement on subset */ 687*34a97f8cSStefano Zampini subidx = deluxe_ctx->par_col2sub[i]; 688*34a97f8cSStefano Zampini 689*34a97f8cSStefano Zampini /* Parallel numbering for dofs in colored subset */ 690*34a97f8cSStefano Zampini ierr = ISSum(sub_schurs->is_AEj_I[subidx],sub_schurs->is_AEj_B[subidx],&is_local_numbering);CHKERRQ(ierr); 691*34a97f8cSStefano Zampini ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr); 692*34a97f8cSStefano Zampini ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr); 693*34a97f8cSStefano Zampini ierr = PCBDDCSubsetNumbering(par_subcomm->comm,pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr); 694*34a97f8cSStefano Zampini ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr); 695*34a97f8cSStefano Zampini 696*34a97f8cSStefano Zampini /* L2Gmap from relevant dofs to local dofs */ 697*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr); 698*34a97f8cSStefano Zampini 699*34a97f8cSStefano Zampini /* L2Gmap from local to global dofs */ 700*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingCreate(par_subcomm->comm,1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr); 701*34a97f8cSStefano Zampini 702*34a97f8cSStefano Zampini /* compute parallel matrix (extended dirichlet problem on subset) */ 703*34a97f8cSStefano Zampini ierr = MatCreateIS(par_subcomm->comm,1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr); 704*34a97f8cSStefano Zampini ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr); 705*34a97f8cSStefano Zampini ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr); 706*34a97f8cSStefano Zampini ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 707*34a97f8cSStefano Zampini ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr); 708*34a97f8cSStefano Zampini ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr); 709*34a97f8cSStefano Zampini 710*34a97f8cSStefano Zampini /* work vector for (parallel) extended dirichlet problem */ 711*34a97f8cSStefano Zampini ierr = MatGetVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr); 712*34a97f8cSStefano Zampini 713*34a97f8cSStefano Zampini /* compute scatters */ 714*34a97f8cSStefano Zampini /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem 715*34a97f8cSStefano Zampini deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */ 716*34a97f8cSStefano Zampini ierr = ISGlobalToLocalMappingApplyIS(pcbddc->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isB_local);CHKERRQ(ierr); 717*34a97f8cSStefano Zampini ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,sub_schurs->work1[subidx],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr); 718*34a97f8cSStefano Zampini ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isW_local);CHKERRQ(ierr); 719*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr); 720*34a97f8cSStefano Zampini ierr = VecScatterCreate(sub_schurs->work1[subidx],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr); 721*34a97f8cSStefano Zampini 722*34a97f8cSStefano Zampini /* free objects no longer neeeded */ 723*34a97f8cSStefano Zampini ierr = ISDestroy(&isW);CHKERRQ(ierr); 724*34a97f8cSStefano Zampini ierr = ISDestroy(&isW_local);CHKERRQ(ierr); 725*34a97f8cSStefano Zampini ierr = ISDestroy(&isB_local);CHKERRQ(ierr); 726*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr); 727*34a97f8cSStefano Zampini ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr); 728*34a97f8cSStefano Zampini ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr); 729*34a97f8cSStefano Zampini ierr = PetscFree(global_numbering);CHKERRQ(ierr); 730*34a97f8cSStefano Zampini 731*34a97f8cSStefano Zampini /* KSP for extended dirichlet problem */ 732*34a97f8cSStefano Zampini ierr = KSPCreate(par_subcomm->comm,&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 733*34a97f8cSStefano Zampini ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr); 734*34a97f8cSStefano Zampini ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr); 735*34a97f8cSStefano Zampini ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr); 736*34a97f8cSStefano Zampini ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pc);CHKERRQ(ierr); 737*34a97f8cSStefano Zampini ierr = PCSetType(pc,PCREDUNDANT);CHKERRQ(ierr); 738*34a97f8cSStefano Zampini ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr); 739*34a97f8cSStefano Zampini //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX DIR %d is \"%s\"\n",PetscGlobalRank,len,((PetscObject)(pcbddc->ksp_D))->prefix);CHKERRQ(ierr); 740*34a97f8cSStefano Zampini len -= 10; /* remove "dirichlet_" TODO CHECK WITH VALGRIND*/ 741*34a97f8cSStefano Zampini ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr); 742*34a97f8cSStefano Zampini //ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len);CHKERRQ(ierr); 743*34a97f8cSStefano Zampini //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX TMP %d is \"%s\"\n",PetscGlobalRank,len,ksp_prefix);CHKERRQ(ierr); 744*34a97f8cSStefano Zampini ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr); 745*34a97f8cSStefano Zampini ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr); 746*34a97f8cSStefano Zampini ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 747*34a97f8cSStefano Zampini ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr); 748*34a97f8cSStefano Zampini ierr = MatDestroy(&color_mat);CHKERRQ(ierr); 749*34a97f8cSStefano Zampini //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX PAR %d is \"%s\"\n",PetscGlobalRank,len,((PetscObject)deluxe_ctx->par_ksp[i])->prefix);CHKERRQ(ierr); 750*34a97f8cSStefano Zampini } else { /* not partecipating in color */ 751*34a97f8cSStefano Zampini deluxe_ctx->par_ksp[i] = 0; 752*34a97f8cSStefano Zampini deluxe_ctx->par_vec[i] = 0; 753*34a97f8cSStefano Zampini deluxe_ctx->par_scctx_p[i] = 0; 754*34a97f8cSStefano Zampini deluxe_ctx->par_scctx_s[i] = 0; 755*34a97f8cSStefano Zampini } 756*34a97f8cSStefano Zampini ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr); 757*34a97f8cSStefano Zampini } 758*34a97f8cSStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 759*34a97f8cSStefano Zampini ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr); 760*34a97f8cSStefano Zampini } 761*34a97f8cSStefano Zampini ierr = PetscFree(is_colors);CHKERRQ(ierr); 762*34a97f8cSStefano Zampini 763*34a97f8cSStefano Zampini if (pcbddc->dbg_flag) { 764*34a97f8cSStefano Zampini Vec test_vec; 765*34a97f8cSStefano Zampini PetscReal error; 766*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs; 767*34a97f8cSStefano Zampini /* test partition of unity of coloured schur complements */ 768*34a97f8cSStefano Zampini for (i=0;i<deluxe_ctx->par_colors;i++) { 769*34a97f8cSStefano Zampini PetscInt subidx = deluxe_ctx->par_col2sub[i]; 770*34a97f8cSStefano Zampini PetscBool error_found = PETSC_FALSE; 771*34a97f8cSStefano Zampini ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 772*34a97f8cSStefano Zampini 773*34a97f8cSStefano Zampini if (deluxe_ctx->par_ksp[i]) { 774*34a97f8cSStefano Zampini /* create random test vec being zero on internal nodes of the extende dirichlet problem */ 775*34a97f8cSStefano Zampini ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr); 776*34a97f8cSStefano Zampini ierr = VecSetRandom(sub_schurs->work1[subidx],PETSC_NULL);CHKERRQ(ierr); 777*34a97f8cSStefano Zampini ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 778*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780*34a97f8cSStefano Zampini /* w_j */ 781*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783*34a97f8cSStefano Zampini /* S_j*w_j */ 784*34a97f8cSStefano Zampini ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr); 785*34a97f8cSStefano Zampini /* \sum_j S_j*w_j */ 786*34a97f8cSStefano Zampini ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 787*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789*34a97f8cSStefano Zampini /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */ 790*34a97f8cSStefano Zampini ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 791*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 792*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 793*34a97f8cSStefano Zampini ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr); 794*34a97f8cSStefano Zampini ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795*34a97f8cSStefano Zampini ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796*34a97f8cSStefano Zampini /* test partition of unity */ 797*34a97f8cSStefano Zampini ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr); 798*34a97f8cSStefano Zampini ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr); 799*34a97f8cSStefano Zampini if (PetscAbsReal(error) > 1.e-8) { 800*34a97f8cSStefano Zampini /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */ 801*34a97f8cSStefano Zampini error_found = PETSC_TRUE; 802*34a97f8cSStefano Zampini } 803*34a97f8cSStefano Zampini ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 804*34a97f8cSStefano Zampini } 805*34a97f8cSStefano Zampini if (error_found) { 806*34a97f8cSStefano Zampini ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr); 807*34a97f8cSStefano Zampini } 808*34a97f8cSStefano Zampini ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 809*34a97f8cSStefano Zampini } 810*34a97f8cSStefano Zampini } 811*34a97f8cSStefano Zampini PetscFunctionReturn(0); 812*34a97f8cSStefano Zampini } 813*34a97f8cSStefano Zampini 814*34a97f8cSStefano Zampini 815*34a97f8cSStefano Zampini #undef __FUNCT__ 816*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq" 817*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc,PetscInt n_local_sequential_problems,PetscInt n_sequential_problems,PetscInt global_sequential[],PetscInt local_sequential[]) 818*34a97f8cSStefano Zampini { 819*34a97f8cSStefano Zampini PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 820*34a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; 821*34a97f8cSStefano Zampini PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs; 822*34a97f8cSStefano Zampini Mat global_schur_subsets,*submat_global_schur_subsets,work_mat; 823*34a97f8cSStefano Zampini IS is_to,is_from; 824*34a97f8cSStefano Zampini PetscScalar *array,*fill_vals; 825*34a97f8cSStefano Zampini PetscInt *all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G,*dummy_idx; 826*34a97f8cSStefano Zampini PetscInt i,j,k,local_problem_index; 827*34a97f8cSStefano Zampini PetscInt subset_size,max_subset_size,max_subset_size_red; 828*34a97f8cSStefano Zampini PetscInt local_size,global_size; 829*34a97f8cSStefano Zampini char ksp_prefix[256]; 830*34a97f8cSStefano Zampini size_t len; 831*34a97f8cSStefano Zampini PetscErrorCode ierr; 832*34a97f8cSStefano Zampini 833*34a97f8cSStefano Zampini PetscFunctionBegin; 834*34a97f8cSStefano Zampini if (!n_sequential_problems) { 835*34a97f8cSStefano Zampini PetscFunctionReturn(0); 836*34a97f8cSStefano Zampini } 837*34a97f8cSStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */ 838*34a97f8cSStefano Zampini max_subset_size = 0; 839*34a97f8cSStefano Zampini local_size = 0; 840*34a97f8cSStefano Zampini for (i=0;i<n_local_sequential_problems;i++) { 841*34a97f8cSStefano Zampini local_problem_index = local_sequential[i]; 842*34a97f8cSStefano Zampini ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 843*34a97f8cSStefano Zampini max_subset_size = PetscMax(subset_size,max_subset_size); 844*34a97f8cSStefano Zampini local_size += subset_size; 845*34a97f8cSStefano Zampini } 846*34a97f8cSStefano Zampini 847*34a97f8cSStefano Zampini /* Work arrays for local indices */ 848*34a97f8cSStefano Zampini ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr); 849*34a97f8cSStefano Zampini ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr); 850*34a97f8cSStefano Zampini 851*34a97f8cSStefano Zampini /* Get local indices in local whole numbering and local boundary numbering */ 852*34a97f8cSStefano Zampini local_size = 0; 853*34a97f8cSStefano Zampini for (i=0;i<n_local_sequential_problems;i++) { 854*34a97f8cSStefano Zampini PetscInt *idxs; 855*34a97f8cSStefano Zampini /* get info on local problem */ 856*34a97f8cSStefano Zampini local_problem_index = local_sequential[i]; 857*34a97f8cSStefano Zampini ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 858*34a97f8cSStefano Zampini ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr); 859*34a97f8cSStefano Zampini /* subset indices in local numbering */ 860*34a97f8cSStefano Zampini ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 861*34a97f8cSStefano Zampini /* subset indices in local boundary numbering */ 862*34a97f8cSStefano Zampini ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,subset_size,idxs,&j,&all_local_idx_B[local_size]);CHKERRQ(ierr); 863*34a97f8cSStefano Zampini ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr); 864*34a97f8cSStefano Zampini if (j != subset_size) { 865*34a97f8cSStefano Zampini SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in BDDC deluxe serial %d (BtoNmap)! %d != %d\n",local_problem_index,subset_size,j); 866*34a97f8cSStefano Zampini } 867*34a97f8cSStefano Zampini local_size += subset_size; 868*34a97f8cSStefano Zampini } 869*34a97f8cSStefano Zampini 870*34a97f8cSStefano Zampini /* Number dofs on all subsets (parallel) and sort numbering */ 871*34a97f8cSStefano Zampini ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),pcbddc->mat_graph->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr); 872*34a97f8cSStefano Zampini ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr); 873*34a97f8cSStefano Zampini for (i=0;i<local_size;i++) { 874*34a97f8cSStefano Zampini all_permutation_G[i]=i; 875*34a97f8cSStefano Zampini } 876*34a97f8cSStefano Zampini ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr); 877*34a97f8cSStefano Zampini 878*34a97f8cSStefano Zampini /* Local matrix of all local Schur on subsets */ 879*34a97f8cSStefano Zampini ierr = MatCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_mat);CHKERRQ(ierr); 880*34a97f8cSStefano Zampini ierr = MatSetSizes(deluxe_ctx->seq_mat,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr); 881*34a97f8cSStefano Zampini ierr = MatSetType(deluxe_ctx->seq_mat,MATAIJ);CHKERRQ(ierr); 882*34a97f8cSStefano Zampini ierr = MatSeqAIJSetPreallocation(deluxe_ctx->seq_mat,max_subset_size,PETSC_NULL);CHKERRQ(ierr); 883*34a97f8cSStefano Zampini 884*34a97f8cSStefano Zampini /* Global matrix of all assembled Schur on subsets */ 885*34a97f8cSStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)pc),&global_schur_subsets);CHKERRQ(ierr); 886*34a97f8cSStefano Zampini ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr); 887*34a97f8cSStefano Zampini ierr = MatSetType(global_schur_subsets,MATAIJ);CHKERRQ(ierr); 888*34a97f8cSStefano Zampini ierr = MPI_Allreduce(&max_subset_size,&max_subset_size_red,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 889*34a97f8cSStefano Zampini ierr = MatMPIAIJSetPreallocation(global_schur_subsets,max_subset_size_red,PETSC_NULL,max_subset_size_red,PETSC_NULL);CHKERRQ(ierr); 890*34a97f8cSStefano Zampini 891*34a97f8cSStefano Zampini /* Work arrays */ 892*34a97f8cSStefano Zampini ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr); 893*34a97f8cSStefano Zampini 894*34a97f8cSStefano Zampini /* Loop on local problems to compute Schur complements explicitly */ 895*34a97f8cSStefano Zampini local_size = 0; 896*34a97f8cSStefano Zampini for (i=0;i<n_local_sequential_problems;i++) { 897*34a97f8cSStefano Zampini /* get info on local problem */ 898*34a97f8cSStefano Zampini local_problem_index = local_sequential[i]; 899*34a97f8cSStefano Zampini ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 900*34a97f8cSStefano Zampini /* local Schur */ 901*34a97f8cSStefano Zampini for (j=0;j<subset_size;j++) { 902*34a97f8cSStefano Zampini ierr = VecSet(sub_schurs->work1[local_problem_index],0.0);CHKERRQ(ierr); 903*34a97f8cSStefano Zampini ierr = VecSetValue(sub_schurs->work1[local_problem_index],j,1.0,INSERT_VALUES);CHKERRQ(ierr); 904*34a97f8cSStefano Zampini ierr = MatMult(sub_schurs->S_Ej[local_problem_index],sub_schurs->work1[local_problem_index],sub_schurs->work2[local_problem_index]);CHKERRQ(ierr); 905*34a97f8cSStefano Zampini /* store vals */ 906*34a97f8cSStefano Zampini ierr = VecGetArray(sub_schurs->work2[local_problem_index],&array);CHKERRQ(ierr); 907*34a97f8cSStefano Zampini for (k=0;k<subset_size;k++) { 908*34a97f8cSStefano Zampini fill_vals[k*subset_size+j] = array[k]; 909*34a97f8cSStefano Zampini } 910*34a97f8cSStefano Zampini ierr = VecRestoreArray(sub_schurs->work2[local_problem_index],&array);CHKERRQ(ierr); 911*34a97f8cSStefano Zampini } 912*34a97f8cSStefano Zampini for (j=0;j<subset_size;j++) { 913*34a97f8cSStefano Zampini dummy_idx[j]=local_size+j; 914*34a97f8cSStefano Zampini } 915*34a97f8cSStefano Zampini ierr = MatSetValues(deluxe_ctx->seq_mat,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 916*34a97f8cSStefano Zampini ierr = MatSetValues(global_schur_subsets,subset_size,&all_local_idx_G[local_size],subset_size,&all_local_idx_G[local_size],fill_vals,ADD_VALUES);CHKERRQ(ierr); 917*34a97f8cSStefano Zampini local_size += subset_size; 918*34a97f8cSStefano Zampini } 919*34a97f8cSStefano Zampini ierr = MatAssemblyBegin(deluxe_ctx->seq_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920*34a97f8cSStefano Zampini ierr = MatAssemblyEnd(deluxe_ctx->seq_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921*34a97f8cSStefano Zampini ierr = MatAssemblyBegin(global_schur_subsets,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 922*34a97f8cSStefano Zampini ierr = MatAssemblyEnd(global_schur_subsets,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923*34a97f8cSStefano Zampini ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr); 924*34a97f8cSStefano Zampini 925*34a97f8cSStefano Zampini /* Create work vectors for sequential part of deluxe */ 926*34a97f8cSStefano Zampini ierr = MatGetVecs(deluxe_ctx->seq_mat,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr); 927*34a97f8cSStefano Zampini 928*34a97f8cSStefano Zampini /* Compute deluxe sequential scatter */ 929*34a97f8cSStefano Zampini ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr); 930*34a97f8cSStefano Zampini ierr = VecScatterCreate(pcbddc->work_scaling,is_from,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr); 931*34a97f8cSStefano Zampini ierr = ISDestroy(&is_from);CHKERRQ(ierr); 932*34a97f8cSStefano Zampini 933*34a97f8cSStefano Zampini /* Get local part of (\sum_j S_Ej) */ 934*34a97f8cSStefano Zampini for (i=0;i<local_size;i++) { 935*34a97f8cSStefano Zampini all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]]; 936*34a97f8cSStefano Zampini } 937*34a97f8cSStefano Zampini ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),local_size,all_local_idx_N,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 938*34a97f8cSStefano Zampini ierr = MatGetSubMatrices(global_schur_subsets,1,&is_to,&is_to,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr); 939*34a97f8cSStefano Zampini ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr); 940*34a97f8cSStefano Zampini ierr = ISDestroy(&is_to);CHKERRQ(ierr); 941*34a97f8cSStefano Zampini for (i=0;i<local_size;i++) { 942*34a97f8cSStefano Zampini all_local_idx_G[all_permutation_G[i]] = i; 943*34a97f8cSStefano Zampini } 944*34a97f8cSStefano Zampini ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr); 945*34a97f8cSStefano Zampini ierr = ISSetPermutation(is_from);CHKERRQ(ierr); 946*34a97f8cSStefano Zampini ierr = MatPermute(submat_global_schur_subsets[0],is_from,is_from,&work_mat);CHKERRQ(ierr); 947*34a97f8cSStefano Zampini ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr); 948*34a97f8cSStefano Zampini ierr = ISDestroy(&is_from);CHKERRQ(ierr); 949*34a97f8cSStefano Zampini ierr = PetscFree(all_permutation_G);CHKERRQ(ierr); 950*34a97f8cSStefano Zampini 951*34a97f8cSStefano Zampini /* Create KSP object for sequential part of deluxe scaling */ 952*34a97f8cSStefano Zampini ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr); 953*34a97f8cSStefano Zampini ierr = KSPSetOperators(deluxe_ctx->seq_ksp,work_mat,work_mat);CHKERRQ(ierr); 954*34a97f8cSStefano Zampini ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr); 955*34a97f8cSStefano Zampini ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr); 956*34a97f8cSStefano Zampini len -= 10; /* remove "dirichlet_" */ 957*34a97f8cSStefano Zampini ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len);CHKERRQ(ierr); 958*34a97f8cSStefano Zampini *(ksp_prefix+len)='\0'; 959*34a97f8cSStefano Zampini ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr); 960*34a97f8cSStefano Zampini ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr); 961*34a97f8cSStefano Zampini ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 962*34a97f8cSStefano Zampini ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr); 963*34a97f8cSStefano Zampini ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX SEQ is %s\n",PetscGlobalRank,((PetscObject)deluxe_ctx->seq_ksp)->prefix);CHKERRQ(ierr); 964*34a97f8cSStefano Zampini ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 965*34a97f8cSStefano Zampini PetscFunctionReturn(0); 966*34a97f8cSStefano Zampini } 967