xref: /petsc/src/ksp/pc/impls/bddc/bddcscalingbasic.c (revision 34a97f8c781d28c390054b7fe2045ae8e3fc2469)
1ab5c6b0cSJed Brown #include <../src/ksp/pc/impls/bddc/bddc.h>
2ab5c6b0cSJed Brown #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3674ae819SStefano Zampini 
4*34a97f8cSStefano Zampini /* prototypes for deluxe functions */
5*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
6*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
7*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
8*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
9*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC,PetscInt,PetscInt,PetscInt[],PetscInt[]);
10*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
11674ae819SStefano Zampini 
12674ae819SStefano Zampini #undef __FUNCT__
13674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension_Basic"
14674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
15674ae819SStefano Zampini {
16674ae819SStefano Zampini   PC_IS* pcis = (PC_IS*)pc->data;
17674ae819SStefano Zampini   PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
18674ae819SStefano Zampini   PetscErrorCode ierr;
19674ae819SStefano Zampini 
20674ae819SStefano Zampini   PetscFunctionBegin;
21674ae819SStefano Zampini   /* Apply partition of unity */
22674ae819SStefano Zampini   ierr = VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);CHKERRQ(ierr);
23674ae819SStefano Zampini   ierr = VecSet(global_vector,0.0);CHKERRQ(ierr);
24674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
25674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
26674ae819SStefano Zampini   PetscFunctionReturn(0);
27674ae819SStefano Zampini }
28674ae819SStefano Zampini 
29674ae819SStefano Zampini #undef __FUNCT__
30674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension_Deluxe"
31674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
32674ae819SStefano Zampini {
33674ae819SStefano Zampini   PC_IS*              pcis=(PC_IS*)pc->data;
34674ae819SStefano Zampini   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
35674ae819SStefano Zampini   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
36*34a97f8cSStefano Zampini   PCBDDCSubSchurs     sub_schurs = deluxe_ctx->sub_schurs;
37674ae819SStefano Zampini   PetscInt            i;
38674ae819SStefano Zampini   PetscErrorCode      ierr;
39674ae819SStefano Zampini 
40674ae819SStefano Zampini   /* TODO CHECK STUFF RELATED WITH FAKE WORK */
41674ae819SStefano Zampini   PetscFunctionBegin;
42*34a97f8cSStefano Zampini   ierr = VecSet(pcbddc->work_scaling,0.0);CHKERRQ(ierr); /* needed by the fake work below */
43*34a97f8cSStefano Zampini   if (deluxe_ctx->n_simple) {
44674ae819SStefano Zampini     /* scale deluxe vertices using diagonal scaling */
45*34a97f8cSStefano Zampini     PetscScalar *array_x,*array_D,*array;
46674ae819SStefano Zampini     ierr = VecGetArray(x,&array_x);CHKERRQ(ierr);
47674ae819SStefano Zampini     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
48674ae819SStefano Zampini     ierr = VecGetArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
49674ae819SStefano Zampini     for (i=0;i<deluxe_ctx->n_simple;i++) {
50674ae819SStefano Zampini       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
51674ae819SStefano Zampini     }
52674ae819SStefano Zampini     ierr = VecRestoreArray(pcbddc->work_scaling,&array);CHKERRQ(ierr);
53674ae819SStefano Zampini     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
54674ae819SStefano Zampini     ierr = VecRestoreArray(x,&array_x);CHKERRQ(ierr);
55*34a97f8cSStefano Zampini   }
56*34a97f8cSStefano Zampini   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
57674ae819SStefano Zampini   if (deluxe_ctx->seq_mat) {
58674ae819SStefano Zampini     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
59674ae819SStefano Zampini     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,x,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
60674ae819SStefano Zampini     ierr = MatMult(deluxe_ctx->seq_mat,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
61674ae819SStefano Zampini     ierr = KSPSolve(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
62674ae819SStefano Zampini     /* fake work due to final ADD VALUES and vertices scaling needed? TODO: check it */
63674ae819SStefano Zampini     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
64674ae819SStefano Zampini     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
65674ae819SStefano Zampini   }
66674ae819SStefano Zampini   /* parallel part */
67674ae819SStefano Zampini   for (i=0;i<deluxe_ctx->par_colors;i++) {
68674ae819SStefano Zampini     if (deluxe_ctx->par_ksp[i]) {
69*34a97f8cSStefano Zampini       PetscMPIInt color_rank;
70*34a97f8cSStefano Zampini       PetscInt    subidx = deluxe_ctx->par_col2sub[i];
71*34a97f8cSStefano Zampini       /* restrict on subset */
72*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
73*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],x,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74*34a97f8cSStefano Zampini       /* S_Ej */
75*34a97f8cSStefano Zampini       ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
76*34a97f8cSStefano Zampini       /* (\sum_j S_Ej)^-1 */
77*34a97f8cSStefano Zampini       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
78*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80674ae819SStefano Zampini       ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
81*34a97f8cSStefano Zampini       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)deluxe_ctx->par_ksp[i]),&color_rank);CHKERRQ(ierr);
82*34a97f8cSStefano Zampini       /* get back solution on subset */
83*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
84*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
85*34a97f8cSStefano Zampini       if (!color_rank) { /* only the master process in coloured comm copies the computed values */
86*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
87*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work1[subidx],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
88674ae819SStefano Zampini       }
89674ae819SStefano Zampini     }
90674ae819SStefano Zampini   }
91674ae819SStefano Zampini   /* put local boundary part in global vector */
92*34a97f8cSStefano Zampini   ierr = VecSet(y,0.0);CHKERRQ(ierr);
93674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
94674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
95674ae819SStefano Zampini   PetscFunctionReturn(0);
96674ae819SStefano Zampini }
97674ae819SStefano Zampini 
98674ae819SStefano Zampini #undef __FUNCT__
99674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingExtension"
100674ae819SStefano Zampini PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
101674ae819SStefano Zampini {
102674ae819SStefano Zampini   PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
103674ae819SStefano Zampini   PetscErrorCode ierr;
104674ae819SStefano Zampini 
105674ae819SStefano Zampini   PetscFunctionBegin;
106674ae819SStefano Zampini   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
107674ae819SStefano Zampini   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,2);
108674ae819SStefano Zampini   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,3);
109674ae819SStefano Zampini   if (local_interface_vector == pcbddc->work_scaling) {
110674ae819SStefano Zampini     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
111674ae819SStefano Zampini   }
112674ae819SStefano Zampini   ierr = PetscTryMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));CHKERRQ(ierr);
113674ae819SStefano Zampini   PetscFunctionReturn(0);
114674ae819SStefano Zampini }
115674ae819SStefano Zampini 
116674ae819SStefano Zampini #undef __FUNCT__
117674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction_Basic"
118674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
119674ae819SStefano Zampini {
120674ae819SStefano Zampini   PetscErrorCode ierr;
121674ae819SStefano Zampini   PC_IS* pcis = (PC_IS*)pc->data;
122674ae819SStefano Zampini 
123674ae819SStefano Zampini   PetscFunctionBegin;
124674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
125674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126674ae819SStefano Zampini   /* Apply partition of unity */
127674ae819SStefano Zampini   ierr = VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);CHKERRQ(ierr);
128674ae819SStefano Zampini   PetscFunctionReturn(0);
129674ae819SStefano Zampini }
130674ae819SStefano Zampini 
131674ae819SStefano Zampini #undef __FUNCT__
132674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction_Deluxe"
133674ae819SStefano Zampini static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
134674ae819SStefano Zampini {
135674ae819SStefano Zampini   PC_IS*              pcis=(PC_IS*)pc->data;
136674ae819SStefano Zampini   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
137674ae819SStefano Zampini   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
138*34a97f8cSStefano Zampini   PCBDDCSubSchurs     sub_schurs = deluxe_ctx->sub_schurs;
139674ae819SStefano Zampini   PetscInt            i;
140674ae819SStefano Zampini   PetscErrorCode      ierr;
141674ae819SStefano Zampini 
142674ae819SStefano Zampini   PetscFunctionBegin;
143674ae819SStefano Zampini   /* get local boundary part of global vector */
144674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
145674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
146*34a97f8cSStefano Zampini   if (deluxe_ctx->n_simple) {
147*34a97f8cSStefano Zampini     /* scale deluxe vertices using diagonal scaling */
148*34a97f8cSStefano Zampini     PetscScalar *array_y,*array_D;
149674ae819SStefano Zampini     ierr = VecGetArray(y,&array_y);CHKERRQ(ierr);
150674ae819SStefano Zampini     ierr = VecGetArray(pcis->D,&array_D);CHKERRQ(ierr);
151674ae819SStefano Zampini     for (i=0;i<deluxe_ctx->n_simple;i++) {
152674ae819SStefano Zampini       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
153674ae819SStefano Zampini     }
154674ae819SStefano Zampini     ierr = VecRestoreArray(pcis->D,&array_D);CHKERRQ(ierr);
155674ae819SStefano Zampini     ierr = VecRestoreArray(y,&array_y);CHKERRQ(ierr);
156*34a97f8cSStefano Zampini   }
157*34a97f8cSStefano Zampini   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication and ksp solution */
158674ae819SStefano Zampini   if (deluxe_ctx->seq_mat) {
159674ae819SStefano Zampini     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
160674ae819SStefano Zampini     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,y,deluxe_ctx->seq_work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
161674ae819SStefano Zampini     ierr = KSPSolveTranspose(deluxe_ctx->seq_ksp,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2);CHKERRQ(ierr);
162674ae819SStefano Zampini     ierr = MatMultTranspose(deluxe_ctx->seq_mat,deluxe_ctx->seq_work2,deluxe_ctx->seq_work1);CHKERRQ(ierr);
163674ae819SStefano Zampini     ierr = VecScatterBegin(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
164674ae819SStefano Zampini     ierr = VecScatterEnd(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165674ae819SStefano Zampini   }
166674ae819SStefano Zampini   /* parallel part */
167674ae819SStefano Zampini   for (i=0;i<deluxe_ctx->par_colors;i++) {
168674ae819SStefano Zampini     if (deluxe_ctx->par_ksp[i]) {
169*34a97f8cSStefano Zampini       PetscInt subidx = deluxe_ctx->par_col2sub[i];
170*34a97f8cSStefano Zampini       /* restrict on subset */
171*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
172*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],y,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
173*34a97f8cSStefano Zampini       /* (\sum_j S_Ej)^-T */
174*34a97f8cSStefano Zampini       ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
175*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
176*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
177674ae819SStefano Zampini       ierr = KSPSolveTranspose(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
178*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
179*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
180*34a97f8cSStefano Zampini       /* S_Ej^T */
181*34a97f8cSStefano Zampini       ierr = MatMultTranspose(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
182*34a97f8cSStefano Zampini       /* extend to boundary */
183*34a97f8cSStefano Zampini       ierr = VecScatterBegin(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
184*34a97f8cSStefano Zampini       ierr = VecScatterEnd(deluxe_ctx->par_scctx_s[i],sub_schurs->work2[subidx],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
185674ae819SStefano Zampini     }
186674ae819SStefano Zampini   }
187674ae819SStefano Zampini   PetscFunctionReturn(0);
188674ae819SStefano Zampini }
189674ae819SStefano Zampini 
190674ae819SStefano Zampini #undef __FUNCT__
191674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingRestriction"
192674ae819SStefano Zampini PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
193674ae819SStefano Zampini {
194674ae819SStefano Zampini   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;
195674ae819SStefano Zampini   PetscErrorCode ierr;
196674ae819SStefano Zampini 
197674ae819SStefano Zampini   PetscFunctionBegin;
198674ae819SStefano Zampini   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
199674ae819SStefano Zampini   PetscValidHeaderSpecific(global_vector,VEC_CLASSID,2);
200674ae819SStefano Zampini   PetscValidHeaderSpecific(local_interface_vector,VEC_CLASSID,3);
201674ae819SStefano Zampini   if (local_interface_vector == pcbddc->work_scaling) {
202674ae819SStefano Zampini     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Local vector should cannot be pcbddc->work_scaling!\n");
203674ae819SStefano Zampini   }
204674ae819SStefano Zampini   ierr = PetscTryMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));CHKERRQ(ierr);
205674ae819SStefano Zampini   PetscFunctionReturn(0);
206674ae819SStefano Zampini }
207674ae819SStefano Zampini 
208674ae819SStefano Zampini #undef __FUNCT__
209674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp"
210674ae819SStefano Zampini PetscErrorCode PCBDDCScalingSetUp(PC pc)
211674ae819SStefano Zampini {
212674ae819SStefano Zampini   PC_IS* pcis=(PC_IS*)pc->data;
213674ae819SStefano Zampini   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
214674ae819SStefano Zampini   PetscErrorCode ierr;
215674ae819SStefano Zampini 
216674ae819SStefano Zampini   PetscFunctionBegin;
217674ae819SStefano Zampini   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
218674ae819SStefano Zampini   /* create work vector for the operator */
219*34a97f8cSStefano Zampini   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
220674ae819SStefano Zampini   ierr = VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);CHKERRQ(ierr);
221*34a97f8cSStefano Zampini   /* always rebuild pcis->D */
22228d874f6SStefano Zampini   if (pcis->use_stiffness_scaling) {
223674ae819SStefano Zampini     ierr = MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);CHKERRQ(ierr);
224674ae819SStefano Zampini     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
225674ae819SStefano Zampini     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
226674ae819SStefano Zampini   }
227674ae819SStefano Zampini   ierr = VecCopy(pcis->D,pcis->vec1_B);CHKERRQ(ierr);
228674ae819SStefano Zampini   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
229674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
230674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
231674ae819SStefano Zampini   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
232674ae819SStefano Zampini   ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
233674ae819SStefano Zampini   ierr = VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);CHKERRQ(ierr);
234674ae819SStefano Zampini   /* now setup */
235681e7c04SStefano Zampini   if (pcbddc->use_deluxe_scaling) {
236*34a97f8cSStefano Zampini     if (!pcbddc->deluxe_ctx) {
237*34a97f8cSStefano Zampini       ierr = PCBDDCScalingCreate_Deluxe(pc);CHKERRQ(ierr);
238*34a97f8cSStefano Zampini     }
239*34a97f8cSStefano Zampini     ierr = PCBDDCScalingSetUp_Deluxe(pc);CHKERRQ(ierr);
240674ae819SStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);CHKERRQ(ierr);
241674ae819SStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);CHKERRQ(ierr);
242674ae819SStefano Zampini   } else {
243674ae819SStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);CHKERRQ(ierr);
244674ae819SStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);CHKERRQ(ierr);
245674ae819SStefano Zampini   }
246*34a97f8cSStefano Zampini 
247674ae819SStefano Zampini   /* test */
248674ae819SStefano Zampini   if (pcbddc->dbg_flag) {
249*34a97f8cSStefano Zampini     Vec         vec2_global;
250674ae819SStefano Zampini     PetscViewer viewer=pcbddc->dbg_viewer;
251*34a97f8cSStefano Zampini     PetscReal   error;
252674ae819SStefano Zampini 
253674ae819SStefano Zampini     /* extension -> from local to parallel */
254*34a97f8cSStefano Zampini     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
255*34a97f8cSStefano Zampini     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
256*34a97f8cSStefano Zampini     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
257*34a97f8cSStefano Zampini     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
258*34a97f8cSStefano Zampini     ierr = VecDuplicate(pcis->vec1_global,&vec2_global);CHKERRQ(ierr);
259*34a97f8cSStefano Zampini     ierr = VecCopy(pcis->vec1_global,vec2_global);CHKERRQ(ierr);
260*34a97f8cSStefano Zampini 
261674ae819SStefano Zampini     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
262674ae819SStefano Zampini     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
263674ae819SStefano Zampini     ierr = PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr);
264*34a97f8cSStefano Zampini     ierr = VecAXPY(pcis->vec1_global,-1.0,vec2_global);CHKERRQ(ierr);
265*34a97f8cSStefano Zampini     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
266674ae819SStefano Zampini     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);CHKERRQ(ierr);
267*34a97f8cSStefano Zampini     if (error>1.e-8 && pcbddc->dbg_flag>1) {
268674ae819SStefano Zampini       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
269674ae819SStefano Zampini     }
270*34a97f8cSStefano Zampini     ierr = VecDestroy(&vec2_global);CHKERRQ(ierr);
271*34a97f8cSStefano Zampini 
272674ae819SStefano Zampini     /* restriction -> from parallel to local */
273674ae819SStefano Zampini     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
274*34a97f8cSStefano Zampini     ierr = VecSetRandom(pcis->vec1_B,NULL);CHKERRQ(ierr);
275674ae819SStefano Zampini     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
276674ae819SStefano Zampini     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
277*34a97f8cSStefano Zampini 
278*34a97f8cSStefano Zampini     ierr = PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);CHKERRQ(ierr);
279*34a97f8cSStefano Zampini     ierr = VecScale(pcis->vec1_B,-1.0);CHKERRQ(ierr);
280*34a97f8cSStefano Zampini     ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
281*34a97f8cSStefano Zampini     ierr = VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
282*34a97f8cSStefano Zampini     ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&error);CHKERRQ(ierr);
283*34a97f8cSStefano Zampini     ierr = PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);CHKERRQ(ierr);
284*34a97f8cSStefano Zampini     if (error>1.e-8 && pcbddc->dbg_flag>1) {
285674ae819SStefano Zampini       ierr = VecView(pcis->vec1_global,viewer);CHKERRQ(ierr);
286674ae819SStefano Zampini     }
287674ae819SStefano Zampini   }
288674ae819SStefano Zampini   PetscFunctionReturn(0);
289674ae819SStefano Zampini }
290674ae819SStefano Zampini 
291674ae819SStefano Zampini #undef __FUNCT__
292674ae819SStefano Zampini #define __FUNCT__ "PCBDDCScalingDestroy"
293674ae819SStefano Zampini PetscErrorCode PCBDDCScalingDestroy(PC pc)
294674ae819SStefano Zampini {
295674ae819SStefano Zampini   PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
296674ae819SStefano Zampini   PetscErrorCode ierr;
297674ae819SStefano Zampini 
298674ae819SStefano Zampini   PetscFunctionBegin;
299*34a97f8cSStefano Zampini   if (pcbddc->deluxe_ctx) {
300*34a97f8cSStefano Zampini     ierr = PCBDDCScalingDestroy_Deluxe(pc);CHKERRQ(ierr);
301674ae819SStefano Zampini   }
302674ae819SStefano Zampini   ierr = VecDestroy(&pcbddc->work_scaling);CHKERRQ(ierr);
303674ae819SStefano Zampini   /* remove functions */
304674ae819SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);CHKERRQ(ierr);
305674ae819SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);CHKERRQ(ierr);
306674ae819SStefano Zampini   PetscFunctionReturn(0);
307674ae819SStefano Zampini }
308674ae819SStefano Zampini 
309*34a97f8cSStefano Zampini #undef __FUNCT__
310*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingCreate_Deluxe"
311*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
312*34a97f8cSStefano Zampini {
313*34a97f8cSStefano Zampini   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
314*34a97f8cSStefano Zampini   PCBDDCDeluxeScaling deluxe_ctx;
315*34a97f8cSStefano Zampini   PetscErrorCode      ierr;
316*34a97f8cSStefano Zampini 
317*34a97f8cSStefano Zampini   PetscFunctionBegin;
318*34a97f8cSStefano Zampini   ierr = PetscNew(&deluxe_ctx);CHKERRQ(ierr);
319*34a97f8cSStefano Zampini   ierr = PCBDDCSubSchursCreate(&deluxe_ctx->sub_schurs);CHKERRQ(ierr);
320*34a97f8cSStefano Zampini   pcbddc->deluxe_ctx = deluxe_ctx;
321*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
322*34a97f8cSStefano Zampini }
323*34a97f8cSStefano Zampini 
324*34a97f8cSStefano Zampini #undef __FUNCT__
325*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingDestroy_Deluxe"
326*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
327*34a97f8cSStefano Zampini {
328*34a97f8cSStefano Zampini   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
329*34a97f8cSStefano Zampini   PetscErrorCode      ierr;
330*34a97f8cSStefano Zampini 
331*34a97f8cSStefano Zampini   PetscFunctionBegin;
332*34a97f8cSStefano Zampini   ierr = PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);CHKERRQ(ierr);
333*34a97f8cSStefano Zampini   ierr = PCBDDCSubSchursDestroy(&(pcbddc->deluxe_ctx->sub_schurs));CHKERRQ(ierr);
334*34a97f8cSStefano Zampini   ierr = PetscFree(pcbddc->deluxe_ctx);CHKERRQ(ierr);
335*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
336*34a97f8cSStefano Zampini }
337*34a97f8cSStefano Zampini 
338*34a97f8cSStefano Zampini #undef __FUNCT__
339*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingReset_Deluxe_Solvers"
340*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
341*34a97f8cSStefano Zampini {
342*34a97f8cSStefano Zampini   PetscErrorCode      ierr;
343*34a97f8cSStefano Zampini 
344*34a97f8cSStefano Zampini   PetscFunctionBegin;
345*34a97f8cSStefano Zampini   ierr = PetscFree(deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
346*34a97f8cSStefano Zampini   deluxe_ctx->n_simple = 0;
347*34a97f8cSStefano Zampini   if (deluxe_ctx->seq_mat) {
348*34a97f8cSStefano Zampini     ierr = VecScatterDestroy(&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
349*34a97f8cSStefano Zampini     ierr = VecDestroy(&deluxe_ctx->seq_work1);CHKERRQ(ierr);
350*34a97f8cSStefano Zampini     ierr = VecDestroy(&deluxe_ctx->seq_work2);CHKERRQ(ierr);
351*34a97f8cSStefano Zampini     ierr = MatDestroy(&deluxe_ctx->seq_mat);CHKERRQ(ierr);
352*34a97f8cSStefano Zampini     ierr = KSPDestroy(&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
353*34a97f8cSStefano Zampini   }
354*34a97f8cSStefano Zampini   if (deluxe_ctx->par_colors) {
355*34a97f8cSStefano Zampini     PetscInt i;
356*34a97f8cSStefano Zampini     for (i=0;i<deluxe_ctx->par_colors;i++) {
357*34a97f8cSStefano Zampini       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
358*34a97f8cSStefano Zampini       ierr = VecScatterDestroy(&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
359*34a97f8cSStefano Zampini       ierr = VecDestroy(&deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
360*34a97f8cSStefano Zampini       ierr = KSPDestroy(&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
361*34a97f8cSStefano Zampini     }
362*34a97f8cSStefano Zampini     ierr = PetscFree5(deluxe_ctx->par_ksp,
363*34a97f8cSStefano Zampini                       deluxe_ctx->par_scctx_s,
364*34a97f8cSStefano Zampini                       deluxe_ctx->par_scctx_p,
365*34a97f8cSStefano Zampini                       deluxe_ctx->par_vec,
366*34a97f8cSStefano Zampini                       deluxe_ctx->par_col2sub);CHKERRQ(ierr);
367*34a97f8cSStefano Zampini   }
368*34a97f8cSStefano Zampini   deluxe_ctx->par_colors = 0;
369*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
370*34a97f8cSStefano Zampini }
371*34a97f8cSStefano Zampini 
372*34a97f8cSStefano Zampini #undef __FUNCT__
373*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe"
374*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
375*34a97f8cSStefano Zampini {
376*34a97f8cSStefano Zampini   PC_IS                  *pcis=(PC_IS*)pc->data;
377*34a97f8cSStefano Zampini   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
378*34a97f8cSStefano Zampini   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
379*34a97f8cSStefano Zampini   PCBDDCSubSchurs        sub_schurs=deluxe_ctx->sub_schurs;
380*34a97f8cSStefano Zampini   PCBDDCGraph            graph;
381*34a97f8cSStefano Zampini   PetscInt               *index_sequential,*index_parallel;
382*34a97f8cSStefano Zampini   PetscInt               *auxlocal_sequential,*auxlocal_parallel;
383*34a97f8cSStefano Zampini   PetscInt               *auxglobal_sequential,*auxglobal_parallel;
384*34a97f8cSStefano Zampini   PetscInt               *auxmapping;
385*34a97f8cSStefano Zampini   PetscInt               i,j,min_loc;
386*34a97f8cSStefano Zampini   PetscInt               n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems;
387*34a97f8cSStefano Zampini   PetscErrorCode         ierr;
388*34a97f8cSStefano Zampini 
389*34a97f8cSStefano Zampini   PetscFunctionBegin;
390*34a97f8cSStefano Zampini   /* throw away the solvers */
391*34a97f8cSStefano Zampini   ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr);
392*34a97f8cSStefano Zampini 
393*34a97f8cSStefano Zampini   /* attach interface graph for determining subsets */
394*34a97f8cSStefano Zampini   if (pcbddc->deluxe_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
395*34a97f8cSStefano Zampini     PetscInt *idx_V_N;
396*34a97f8cSStefano Zampini     IS       verticesIS;
397*34a97f8cSStefano Zampini     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&idx_V_N);CHKERRQ(ierr);
398*34a97f8cSStefano Zampini     ierr = ISCreateGeneral(PETSC_COMM_SELF,i,idx_V_N,PETSC_OWN_POINTER,&verticesIS);CHKERRQ(ierr);
399*34a97f8cSStefano Zampini     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
400*34a97f8cSStefano Zampini     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
401*34a97f8cSStefano Zampini     ierr = PCBDDCGraphSetUp(graph,0,PETSC_NULL,pcbddc->DirichletBoundaries,0,PETSC_NULL,verticesIS);CHKERRQ(ierr);
402*34a97f8cSStefano Zampini     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
403*34a97f8cSStefano Zampini     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
404*34a97f8cSStefano Zampini /*
405*34a97f8cSStefano Zampini     if (pcbddc->dbg_flag) {
406*34a97f8cSStefano Zampini       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
407*34a97f8cSStefano Zampini     }
408*34a97f8cSStefano Zampini */
409*34a97f8cSStefano Zampini   } else {
410*34a97f8cSStefano Zampini     graph = pcbddc->mat_graph;
411*34a97f8cSStefano Zampini   }
412*34a97f8cSStefano Zampini 
413*34a97f8cSStefano Zampini   /* map interface's subsets */
414*34a97f8cSStefano Zampini   j = 0;
415*34a97f8cSStefano Zampini   for (i=0;i<graph->ncc;i++) {
416*34a97f8cSStefano Zampini     j = PetscMax(j,graph->cptr[i+1]-graph->cptr[i]);
417*34a97f8cSStefano Zampini   }
418*34a97f8cSStefano Zampini   ierr = PetscMalloc5(j,&auxmapping,
419*34a97f8cSStefano Zampini                       graph->ncc,&auxlocal_sequential,
420*34a97f8cSStefano Zampini                       graph->ncc,&auxlocal_parallel,
421*34a97f8cSStefano Zampini                       graph->ncc,&index_sequential,
422*34a97f8cSStefano Zampini                       graph->ncc,&index_parallel);CHKERRQ(ierr);
423*34a97f8cSStefano Zampini 
424*34a97f8cSStefano Zampini   n_local_sequential_problems = 0;
425*34a97f8cSStefano Zampini   n_local_parallel_problems = 0;
426*34a97f8cSStefano Zampini   deluxe_ctx->n_simple = 0;
427*34a97f8cSStefano Zampini   for (i=0;i<graph->ncc;i++) {
428*34a97f8cSStefano Zampini     PetscInt subset_size = graph->cptr[i+1]-graph->cptr[i];
429*34a97f8cSStefano Zampini     if (subset_size > 1) {
430*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,&graph->queue[graph->cptr[i]],auxmapping);CHKERRQ(ierr);
431*34a97f8cSStefano Zampini       min_loc = 0;
432*34a97f8cSStefano Zampini       for (j=1;j<subset_size;j++) {
433*34a97f8cSStefano Zampini         if (auxmapping[j]<auxmapping[min_loc]) {
434*34a97f8cSStefano Zampini           min_loc = j;
435*34a97f8cSStefano Zampini         }
436*34a97f8cSStefano Zampini       }
437*34a97f8cSStefano Zampini       if (subset_size > pcbddc->deluxe_threshold) {
438*34a97f8cSStefano Zampini         index_parallel[n_local_parallel_problems] = i;
439*34a97f8cSStefano Zampini         auxlocal_parallel[n_local_parallel_problems] = graph->queue[graph->cptr[i]+min_loc];
440*34a97f8cSStefano Zampini         n_local_parallel_problems++;
441*34a97f8cSStefano Zampini       } else {
442*34a97f8cSStefano Zampini         index_sequential[n_local_sequential_problems] = i;
443*34a97f8cSStefano Zampini         auxlocal_sequential[n_local_sequential_problems] = graph->queue[graph->cptr[i]+min_loc];
444*34a97f8cSStefano Zampini         n_local_sequential_problems++;
445*34a97f8cSStefano Zampini       }
446*34a97f8cSStefano Zampini     } else {
447*34a97f8cSStefano Zampini       deluxe_ctx->n_simple++;
448*34a97f8cSStefano Zampini     }
449*34a97f8cSStefano Zampini   }
450*34a97f8cSStefano Zampini 
451*34a97f8cSStefano Zampini   /* diagonal scaling on size 1 cc and on dirichlet boundary dofs */
452*34a97f8cSStefano Zampini   for (i=0;i<graph->nvtxs;i++) {
453*34a97f8cSStefano Zampini     if (graph->count[i] && graph->special_dof[i] == PCBDDCGRAPH_DIRICHLET_MARK) {
454*34a97f8cSStefano Zampini       deluxe_ctx->n_simple++;
455*34a97f8cSStefano Zampini     }
456*34a97f8cSStefano Zampini   }
457*34a97f8cSStefano Zampini   ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
458*34a97f8cSStefano Zampini   deluxe_ctx->n_simple = 0;
459*34a97f8cSStefano Zampini   for (i=0;i<graph->ncc;i++) {
460*34a97f8cSStefano Zampini     if (graph->cptr[i+1]-graph->cptr[i]==1) {
461*34a97f8cSStefano Zampini       deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = graph->queue[graph->cptr[i]];
462*34a97f8cSStefano Zampini     }
463*34a97f8cSStefano Zampini   }
464*34a97f8cSStefano Zampini   for (i=0;i<graph->nvtxs;i++) {
465*34a97f8cSStefano Zampini     if (graph->count[i] && graph->special_dof[i] == PCBDDCGRAPH_DIRICHLET_MARK) {
466*34a97f8cSStefano Zampini       deluxe_ctx->idx_simple_B[deluxe_ctx->n_simple++] = i;
467*34a97f8cSStefano Zampini     }
468*34a97f8cSStefano Zampini   }
469*34a97f8cSStefano Zampini   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B,&i,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
470*34a97f8cSStefano Zampini   if (i != deluxe_ctx->n_simple) {
471*34a97f8cSStefano Zampini     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simple scaling dofs! %d != %d",i,deluxe_ctx->n_simple);
472*34a97f8cSStefano Zampini   }
473*34a97f8cSStefano Zampini   ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr);
474*34a97f8cSStefano Zampini 
475*34a97f8cSStefano Zampini   /* SetUp local schur complements on subsets TODO better reuse procedure */
476*34a97f8cSStefano Zampini   if (!sub_schurs->n_subs) {
477*34a97f8cSStefano Zampini     Mat       S_j;
478*34a97f8cSStefano Zampini     PetscBool free_used_adj;
479*34a97f8cSStefano Zampini     PetscInt  *used_xadj,*used_adjncy;
480*34a97f8cSStefano Zampini 
481*34a97f8cSStefano Zampini     /* decide the adjacency to be used for determining internal problems for local schur on subsets */
482*34a97f8cSStefano Zampini     free_used_adj = PETSC_FALSE;
483*34a97f8cSStefano Zampini     if (pcbddc->deluxe_layers == -1) {
484*34a97f8cSStefano Zampini       used_xadj = NULL;
485*34a97f8cSStefano Zampini       used_adjncy = NULL;
486*34a97f8cSStefano Zampini     } else {
487*34a97f8cSStefano Zampini       if ((pcbddc->deluxe_use_useradj && pcbddc->mat_graph->xadj) || !pcbddc->deluxe_compute_rowadj) {
488*34a97f8cSStefano Zampini         used_xadj = pcbddc->mat_graph->xadj;
489*34a97f8cSStefano Zampini         used_adjncy = pcbddc->mat_graph->adjncy;
490*34a97f8cSStefano Zampini       } else {
491*34a97f8cSStefano Zampini         Mat            mat_adj;
492*34a97f8cSStefano Zampini         PetscBool      flg_row=PETSC_TRUE;
493*34a97f8cSStefano Zampini         const PetscInt *xadj,*adjncy;
494*34a97f8cSStefano Zampini         PetscInt       nvtxs;
495*34a97f8cSStefano Zampini 
496*34a97f8cSStefano Zampini         ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
497*34a97f8cSStefano Zampini         ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
498*34a97f8cSStefano Zampini         if (!flg_row) {
499*34a97f8cSStefano Zampini           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
500*34a97f8cSStefano Zampini         }
501*34a97f8cSStefano Zampini         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
502*34a97f8cSStefano Zampini         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
503*34a97f8cSStefano Zampini         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
504*34a97f8cSStefano Zampini         ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
505*34a97f8cSStefano Zampini         if (!flg_row) {
506*34a97f8cSStefano Zampini           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
507*34a97f8cSStefano Zampini         }
508*34a97f8cSStefano Zampini         ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
509*34a97f8cSStefano Zampini         free_used_adj = PETSC_TRUE;
510*34a97f8cSStefano Zampini       }
511*34a97f8cSStefano Zampini     }
512*34a97f8cSStefano Zampini 
513*34a97f8cSStefano Zampini     /* Create Schur complement matrix */
514*34a97f8cSStefano Zampini     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
515*34a97f8cSStefano Zampini     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
516*34a97f8cSStefano Zampini     /* setup Schur complements on subsets */
517*34a97f8cSStefano Zampini     ierr = PCBDDCSubSchursSetUp(sub_schurs,S_j,pcis->is_I_local,pcis->is_B_local,graph->ncc,graph->cptr,graph->queue,used_xadj,used_adjncy,PETSC_TRUE,pcbddc->deluxe_layers);CHKERRQ(ierr);
518*34a97f8cSStefano Zampini     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
519*34a97f8cSStefano Zampini     /* free adjacency */
520*34a97f8cSStefano Zampini     if (free_used_adj) {
521*34a97f8cSStefano Zampini       ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
522*34a97f8cSStefano Zampini     }
523*34a97f8cSStefano Zampini   }
524*34a97f8cSStefano Zampini 
525*34a97f8cSStefano Zampini   /* Number parallel problems */
526*34a97f8cSStefano Zampini   auxglobal_parallel = 0;
527*34a97f8cSStefano Zampini   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr);
528*34a97f8cSStefano Zampini   if (pcbddc->dbg_flag) {
529*34a97f8cSStefano Zampini     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of parallel subproblems: %d\n",n_parallel_problems);
530*34a97f8cSStefano Zampini   }
531*34a97f8cSStefano Zampini 
532*34a97f8cSStefano Zampini   /* Compute data structures to solve parallel problems */
533*34a97f8cSStefano Zampini   ierr = PCBDDCScalingSetUp_Deluxe_Par(pc,n_local_parallel_problems,n_parallel_problems,auxglobal_parallel,index_parallel);CHKERRQ(ierr);
534*34a97f8cSStefano Zampini   ierr = PetscFree(auxglobal_parallel);CHKERRQ(ierr);
535*34a97f8cSStefano Zampini 
536*34a97f8cSStefano Zampini 
537*34a97f8cSStefano Zampini   /* Number sequential problems */
538*34a97f8cSStefano Zampini   auxglobal_sequential = 0;
539*34a97f8cSStefano Zampini   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr);
540*34a97f8cSStefano Zampini   if (pcbddc->dbg_flag) {
541*34a97f8cSStefano Zampini     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Deluxe global number of sequential subproblems: %d\n",n_sequential_problems);
542*34a97f8cSStefano Zampini   }
543*34a97f8cSStefano Zampini 
544*34a97f8cSStefano Zampini   /* Compute data structures to solve sequential problems */
545*34a97f8cSStefano Zampini   ierr = PCBDDCScalingSetUp_Deluxe_Seq(pc,n_local_sequential_problems,n_sequential_problems,auxglobal_sequential,index_sequential);CHKERRQ(ierr);
546*34a97f8cSStefano Zampini   ierr = PetscFree(auxglobal_sequential);CHKERRQ(ierr);
547*34a97f8cSStefano Zampini 
548*34a97f8cSStefano Zampini   /* free workspace */
549*34a97f8cSStefano Zampini   ierr = PetscFree5(auxmapping,auxlocal_sequential,auxlocal_parallel,index_sequential,index_parallel);CHKERRQ(ierr);
550*34a97f8cSStefano Zampini 
551*34a97f8cSStefano Zampini   /* free graph struct */
552*34a97f8cSStefano Zampini   if (pcbddc->deluxe_rebuild) {
553*34a97f8cSStefano Zampini     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
554*34a97f8cSStefano Zampini   }
555*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
556*34a97f8cSStefano Zampini }
557*34a97f8cSStefano Zampini 
558*34a97f8cSStefano Zampini #undef __FUNCT__
559*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Par"
560*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Par(PC pc, PetscInt n_local_parallel_problems,PetscInt n_parallel_problems,PetscInt global_parallel[],PetscInt index_parallel[])
561*34a97f8cSStefano Zampini {
562*34a97f8cSStefano Zampini   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
563*34a97f8cSStefano Zampini   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
564*34a97f8cSStefano Zampini   /* coloring */
565*34a97f8cSStefano Zampini   Mat                    parallel_problems;
566*34a97f8cSStefano Zampini   MatColoring            coloring_obj;
567*34a97f8cSStefano Zampini   ISColoring             coloring_parallel_problems;
568*34a97f8cSStefano Zampini   IS                     *par_is_colors,*is_colors;
569*34a97f8cSStefano Zampini   /* working stuff */
570*34a97f8cSStefano Zampini   PetscInt               i,j;
571*34a97f8cSStefano Zampini   PetscErrorCode         ierr;
572*34a97f8cSStefano Zampini 
573*34a97f8cSStefano Zampini   PetscFunctionBegin;
574*34a97f8cSStefano Zampini   if (!n_parallel_problems) {
575*34a97f8cSStefano Zampini     PetscFunctionReturn(0);
576*34a97f8cSStefano Zampini   }
577*34a97f8cSStefano Zampini   /* Color parallel subproblems */
578*34a97f8cSStefano Zampini   ierr = MatCreate(PetscObjectComm((PetscObject)pc),&parallel_problems);CHKERRQ(ierr);
579*34a97f8cSStefano Zampini   ierr = MatSetSizes(parallel_problems,PETSC_DECIDE,PETSC_DECIDE,n_parallel_problems,n_parallel_problems);CHKERRQ(ierr);
580*34a97f8cSStefano Zampini   ierr = MatSetType(parallel_problems,MATAIJ);CHKERRQ(ierr);
581*34a97f8cSStefano Zampini   ierr = MatSetUp(parallel_problems);CHKERRQ(ierr);
582*34a97f8cSStefano Zampini   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
583*34a97f8cSStefano Zampini   ierr = MatSetOption(parallel_problems,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
584*34a97f8cSStefano Zampini   for (i=0;i<n_local_parallel_problems;i++) {
585*34a97f8cSStefano Zampini     PetscInt row = global_parallel[i];
586*34a97f8cSStefano Zampini     for (j=0;j<n_local_parallel_problems;j++) {
587*34a97f8cSStefano Zampini       PetscInt col = global_parallel[j];
588*34a97f8cSStefano Zampini       if (row != col) {
589*34a97f8cSStefano Zampini         ierr = MatSetValue(parallel_problems,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
590*34a97f8cSStefano Zampini       }
591*34a97f8cSStefano Zampini     }
592*34a97f8cSStefano Zampini   }
593*34a97f8cSStefano Zampini   ierr = MatAssemblyBegin(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
594*34a97f8cSStefano Zampini   ierr = MatAssemblyEnd(parallel_problems,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
595*34a97f8cSStefano Zampini   if (pcbddc->dbg_flag > 1) {
596*34a97f8cSStefano Zampini     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597*34a97f8cSStefano Zampini     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Adj matrix for deluxe parallel problems\n");CHKERRQ(ierr);
598*34a97f8cSStefano Zampini     ierr = MatView(parallel_problems,pcbddc->dbg_viewer);CHKERRQ(ierr);
599*34a97f8cSStefano Zampini   }
600*34a97f8cSStefano Zampini   ierr = MatColoringCreate(parallel_problems,&coloring_obj);CHKERRQ(ierr);
601*34a97f8cSStefano Zampini   ierr = MatColoringSetDistance(coloring_obj,1);CHKERRQ(ierr);
602*34a97f8cSStefano Zampini   ierr = MatColoringSetType(coloring_obj,MATCOLORINGJP);CHKERRQ(ierr);
603*34a97f8cSStefano Zampini   ierr = MatColoringApply(coloring_obj,&coloring_parallel_problems);CHKERRQ(ierr);
604*34a97f8cSStefano Zampini   ierr = ISColoringGetIS(coloring_parallel_problems,&deluxe_ctx->par_colors,&par_is_colors);CHKERRQ(ierr);
605*34a97f8cSStefano Zampini   if (pcbddc->dbg_flag) {
606*34a97f8cSStefano Zampini     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
607*34a97f8cSStefano Zampini     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Number of colors %d for parallel part of deluxe\n",deluxe_ctx->par_colors);CHKERRQ(ierr);
608*34a97f8cSStefano Zampini   }
609*34a97f8cSStefano Zampini 
610*34a97f8cSStefano Zampini   /* all procs should know the color distribution */
611*34a97f8cSStefano Zampini   ierr = PetscMalloc1(deluxe_ctx->par_colors,&is_colors);CHKERRQ(ierr);
612*34a97f8cSStefano Zampini   for (i=0;i<deluxe_ctx->par_colors;i++) {
613*34a97f8cSStefano Zampini     if (pcbddc->dbg_flag) {
614*34a97f8cSStefano Zampini       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Global problem indexes for color %d\n",i);CHKERRQ(ierr);
615*34a97f8cSStefano Zampini       ierr = ISView(par_is_colors[i],pcbddc->dbg_viewer);CHKERRQ(ierr);
616*34a97f8cSStefano Zampini       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
617*34a97f8cSStefano Zampini     }
618*34a97f8cSStefano Zampini     ierr = ISAllGather(par_is_colors[i],&is_colors[i]);CHKERRQ(ierr);
619*34a97f8cSStefano Zampini   }
620*34a97f8cSStefano Zampini 
621*34a97f8cSStefano Zampini   /* free unneeded objects */
622*34a97f8cSStefano Zampini   ierr = ISColoringRestoreIS(coloring_parallel_problems,&par_is_colors);CHKERRQ(ierr);
623*34a97f8cSStefano Zampini   ierr = ISColoringDestroy(&coloring_parallel_problems);CHKERRQ(ierr);
624*34a97f8cSStefano Zampini   ierr = MatColoringDestroy(&coloring_obj);CHKERRQ(ierr);
625*34a97f8cSStefano Zampini   ierr = MatDestroy(&parallel_problems);CHKERRQ(ierr);
626*34a97f8cSStefano Zampini 
627*34a97f8cSStefano Zampini   /* allocate deluxe arrays for parallel problems */
628*34a97f8cSStefano Zampini   ierr = PetscMalloc5(deluxe_ctx->par_colors,&deluxe_ctx->par_ksp,
629*34a97f8cSStefano Zampini                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_s,
630*34a97f8cSStefano Zampini                       deluxe_ctx->par_colors,&deluxe_ctx->par_scctx_p,
631*34a97f8cSStefano Zampini                       deluxe_ctx->par_colors,&deluxe_ctx->par_vec,
632*34a97f8cSStefano Zampini                       deluxe_ctx->par_colors,&deluxe_ctx->par_col2sub);CHKERRQ(ierr);
633*34a97f8cSStefano Zampini 
634*34a97f8cSStefano Zampini   /* cycle on colors */
635*34a97f8cSStefano Zampini   for (i=0;i<deluxe_ctx->par_colors;i++) {
636*34a97f8cSStefano Zampini     PetscSubcomm    par_subcomm;
637*34a97f8cSStefano Zampini     const PetscInt* idxs_subproblems;
638*34a97f8cSStefano Zampini     PetscInt        color_size;
639*34a97f8cSStefano Zampini     PetscMPIInt     rank,active_color;
640*34a97f8cSStefano Zampini 
641*34a97f8cSStefano Zampini     /* get local index of i-th parallel colored problem */
642*34a97f8cSStefano Zampini     ierr = ISGetLocalSize(is_colors[i],&color_size);CHKERRQ(ierr);
643*34a97f8cSStefano Zampini     ierr = ISGetIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
644*34a97f8cSStefano Zampini     /* split comm for computing parallel problems for this color */
645*34a97f8cSStefano Zampini     /* Processes not partecipating at this stage will have color = color_size */
646*34a97f8cSStefano Zampini     /* because PetscCommDuplicate does not handle MPI_COMM_NULL */
647*34a97f8cSStefano Zampini     active_color = color_size;
648*34a97f8cSStefano Zampini     deluxe_ctx->par_col2sub[i] = -1;
649*34a97f8cSStefano Zampini     for (j=0;j<n_local_parallel_problems;j++) {
650*34a97f8cSStefano Zampini       PetscInt local_idx;
651*34a97f8cSStefano Zampini       ierr = PetscFindInt(global_parallel[j],color_size,idxs_subproblems,&local_idx);CHKERRQ(ierr);
652*34a97f8cSStefano Zampini       if (local_idx > -1) {
653*34a97f8cSStefano Zampini         ierr = PetscMPIIntCast(local_idx,&active_color);CHKERRQ(ierr);
654*34a97f8cSStefano Zampini         deluxe_ctx->par_col2sub[i] = index_parallel[j];
655*34a97f8cSStefano Zampini         break;
656*34a97f8cSStefano Zampini       }
657*34a97f8cSStefano Zampini     }
658*34a97f8cSStefano Zampini     ierr = ISRestoreIndices(is_colors[i],&idxs_subproblems);CHKERRQ(ierr);
659*34a97f8cSStefano Zampini     ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)pc),&par_subcomm);CHKERRQ(ierr);
660*34a97f8cSStefano Zampini     ierr = PetscSubcommSetNumber(par_subcomm,color_size+1);CHKERRQ(ierr);
661*34a97f8cSStefano Zampini     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
662*34a97f8cSStefano Zampini     ierr = PetscSubcommSetTypeGeneral(par_subcomm,active_color,rank);CHKERRQ(ierr);
663*34a97f8cSStefano Zampini     /* print debug info */
664*34a97f8cSStefano Zampini     if (pcbddc->dbg_flag) {
665*34a97f8cSStefano Zampini       PetscMPIInt crank,csize;
666*34a97f8cSStefano Zampini       ierr = MPI_Comm_rank(par_subcomm->comm,&crank);CHKERRQ(ierr);
667*34a97f8cSStefano Zampini       ierr = MPI_Comm_size(par_subcomm->comm,&csize);CHKERRQ(ierr);
668*34a97f8cSStefano Zampini       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Color %d: size %d, details follows.\n",i,color_size);CHKERRQ(ierr);
669*34a97f8cSStefano Zampini       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
670*34a97f8cSStefano Zampini       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
671*34a97f8cSStefano Zampini       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"  Subdomain %d: color in subcomm %d (rank %d out of %d) (lidx %d)\n",PetscGlobalRank,par_subcomm->color,crank,csize,deluxe_ctx->par_col2sub[i]);CHKERRQ(ierr);
672*34a97f8cSStefano Zampini       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
673*34a97f8cSStefano Zampini     }
674*34a97f8cSStefano Zampini 
675*34a97f8cSStefano Zampini     if (deluxe_ctx->par_col2sub[i] >= 0) {
676*34a97f8cSStefano Zampini       PC                     pc;
677*34a97f8cSStefano Zampini       Mat                    color_mat,color_mat_is,temp_mat;
678*34a97f8cSStefano Zampini       ISLocalToGlobalMapping WtoNmap,l2gmap_subset;
679*34a97f8cSStefano Zampini       IS                     is_local_numbering,isB_local,isW_local,isW;
680*34a97f8cSStefano Zampini       PCBDDCSubSchurs        sub_schurs = deluxe_ctx->sub_schurs;
681*34a97f8cSStefano Zampini       PetscInt               subidx,n_local_dofs,n_global_dofs;
682*34a97f8cSStefano Zampini       PetscInt               *global_numbering,*local_numbering;
683*34a97f8cSStefano Zampini       char                   ksp_prefix[256];
684*34a97f8cSStefano Zampini       size_t                 len;
685*34a97f8cSStefano Zampini 
686*34a97f8cSStefano Zampini       /* Local index for schur complement on subset */
687*34a97f8cSStefano Zampini       subidx = deluxe_ctx->par_col2sub[i];
688*34a97f8cSStefano Zampini 
689*34a97f8cSStefano Zampini       /* Parallel numbering for dofs in colored subset */
690*34a97f8cSStefano Zampini       ierr = ISSum(sub_schurs->is_AEj_I[subidx],sub_schurs->is_AEj_B[subidx],&is_local_numbering);CHKERRQ(ierr);
691*34a97f8cSStefano Zampini       ierr = ISGetLocalSize(is_local_numbering,&n_local_dofs);CHKERRQ(ierr);
692*34a97f8cSStefano Zampini       ierr = ISGetIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
693*34a97f8cSStefano Zampini       ierr = PCBDDCSubsetNumbering(par_subcomm->comm,pcbddc->mat_graph->l2gmap,n_local_dofs,local_numbering,PETSC_NULL,&n_global_dofs,&global_numbering);CHKERRQ(ierr);
694*34a97f8cSStefano Zampini       ierr = ISRestoreIndices(is_local_numbering,(const PetscInt **)&local_numbering);CHKERRQ(ierr);
695*34a97f8cSStefano Zampini 
696*34a97f8cSStefano Zampini       /* L2Gmap from relevant dofs to local dofs */
697*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingCreateIS(is_local_numbering,&WtoNmap);CHKERRQ(ierr);
698*34a97f8cSStefano Zampini 
699*34a97f8cSStefano Zampini       /* L2Gmap from local to global dofs */
700*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingCreate(par_subcomm->comm,1,n_local_dofs,global_numbering,PETSC_COPY_VALUES,&l2gmap_subset);CHKERRQ(ierr);
701*34a97f8cSStefano Zampini 
702*34a97f8cSStefano Zampini       /* compute parallel matrix (extended dirichlet problem on subset) */
703*34a97f8cSStefano Zampini       ierr = MatCreateIS(par_subcomm->comm,1,PETSC_DECIDE,PETSC_DECIDE,n_global_dofs,n_global_dofs,l2gmap_subset,&color_mat_is);CHKERRQ(ierr);
704*34a97f8cSStefano Zampini       ierr = MatGetSubMatrix(pcbddc->local_mat,is_local_numbering,is_local_numbering,MAT_INITIAL_MATRIX,&temp_mat);CHKERRQ(ierr);
705*34a97f8cSStefano Zampini       ierr = MatISSetLocalMat(color_mat_is,temp_mat);CHKERRQ(ierr);
706*34a97f8cSStefano Zampini       ierr = MatDestroy(&temp_mat);CHKERRQ(ierr);
707*34a97f8cSStefano Zampini       ierr = MatISGetMPIXAIJ(color_mat_is,MAT_INITIAL_MATRIX,&color_mat);CHKERRQ(ierr);
708*34a97f8cSStefano Zampini       ierr = MatDestroy(&color_mat_is);CHKERRQ(ierr);
709*34a97f8cSStefano Zampini 
710*34a97f8cSStefano Zampini       /* work vector for (parallel) extended dirichlet problem */
711*34a97f8cSStefano Zampini       ierr = MatGetVecs(color_mat,&deluxe_ctx->par_vec[i],NULL);CHKERRQ(ierr);
712*34a97f8cSStefano Zampini 
713*34a97f8cSStefano Zampini       /* compute scatters */
714*34a97f8cSStefano Zampini       /* deluxe_ctx->par_scctx_p[i] extension from local subset to extended dirichlet problem
715*34a97f8cSStefano Zampini          deluxe_ctx->par_scctx_s[i] restriction from local boundary to subset -> simple copy of selected values */
716*34a97f8cSStefano Zampini       ierr = ISGlobalToLocalMappingApplyIS(pcbddc->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isB_local);CHKERRQ(ierr);
717*34a97f8cSStefano Zampini       ierr = VecScatterCreate(pcbddc->work_scaling,isB_local,sub_schurs->work1[subidx],NULL,&deluxe_ctx->par_scctx_s[i]);CHKERRQ(ierr);
718*34a97f8cSStefano Zampini       ierr = ISGlobalToLocalMappingApplyIS(WtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[subidx],&isW_local);CHKERRQ(ierr);
719*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingApplyIS(l2gmap_subset,isW_local,&isW);CHKERRQ(ierr);
720*34a97f8cSStefano Zampini       ierr = VecScatterCreate(sub_schurs->work1[subidx],NULL,deluxe_ctx->par_vec[i],isW,&deluxe_ctx->par_scctx_p[i]);CHKERRQ(ierr);
721*34a97f8cSStefano Zampini 
722*34a97f8cSStefano Zampini       /* free objects no longer neeeded */
723*34a97f8cSStefano Zampini       ierr = ISDestroy(&isW);CHKERRQ(ierr);
724*34a97f8cSStefano Zampini       ierr = ISDestroy(&isW_local);CHKERRQ(ierr);
725*34a97f8cSStefano Zampini       ierr = ISDestroy(&isB_local);CHKERRQ(ierr);
726*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingDestroy(&WtoNmap);CHKERRQ(ierr);
727*34a97f8cSStefano Zampini       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subset);CHKERRQ(ierr);
728*34a97f8cSStefano Zampini       ierr = ISDestroy(&is_local_numbering);CHKERRQ(ierr);
729*34a97f8cSStefano Zampini       ierr = PetscFree(global_numbering);CHKERRQ(ierr);
730*34a97f8cSStefano Zampini 
731*34a97f8cSStefano Zampini       /* KSP for extended dirichlet problem */
732*34a97f8cSStefano Zampini       ierr = KSPCreate(par_subcomm->comm,&deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
733*34a97f8cSStefano Zampini       ierr = KSPSetOperators(deluxe_ctx->par_ksp[i],color_mat,color_mat);CHKERRQ(ierr);
734*34a97f8cSStefano Zampini       ierr = KSPSetTolerances(deluxe_ctx->par_ksp[i],1.e-12,1.e-12,1.e10,10000);CHKERRQ(ierr);
735*34a97f8cSStefano Zampini       ierr = KSPSetType(deluxe_ctx->par_ksp[i],KSPPREONLY);CHKERRQ(ierr);
736*34a97f8cSStefano Zampini       ierr = KSPGetPC(deluxe_ctx->par_ksp[i],&pc);CHKERRQ(ierr);
737*34a97f8cSStefano Zampini       ierr = PCSetType(pc,PCREDUNDANT);CHKERRQ(ierr);
738*34a97f8cSStefano Zampini       ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
739*34a97f8cSStefano Zampini       //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX DIR %d is \"%s\"\n",PetscGlobalRank,len,((PetscObject)(pcbddc->ksp_D))->prefix);CHKERRQ(ierr);
740*34a97f8cSStefano Zampini       len -= 10; /* remove "dirichlet_" TODO CHECK WITH VALGRIND*/
741*34a97f8cSStefano Zampini       ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len+1);CHKERRQ(ierr);
742*34a97f8cSStefano Zampini       //ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len);CHKERRQ(ierr);
743*34a97f8cSStefano Zampini       //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX TMP %d is \"%s\"\n",PetscGlobalRank,len,ksp_prefix);CHKERRQ(ierr);
744*34a97f8cSStefano Zampini       ierr = PetscStrcat(ksp_prefix,"deluxe_par_");CHKERRQ(ierr);
745*34a97f8cSStefano Zampini       ierr = KSPSetOptionsPrefix(deluxe_ctx->par_ksp[i],ksp_prefix);CHKERRQ(ierr);
746*34a97f8cSStefano Zampini       ierr = KSPSetFromOptions(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
747*34a97f8cSStefano Zampini       ierr = KSPSetUp(deluxe_ctx->par_ksp[i]);CHKERRQ(ierr);
748*34a97f8cSStefano Zampini       ierr = MatDestroy(&color_mat);CHKERRQ(ierr);
749*34a97f8cSStefano Zampini       //ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX PAR %d is \"%s\"\n",PetscGlobalRank,len,((PetscObject)deluxe_ctx->par_ksp[i])->prefix);CHKERRQ(ierr);
750*34a97f8cSStefano Zampini     } else { /* not partecipating in color */
751*34a97f8cSStefano Zampini       deluxe_ctx->par_ksp[i] = 0;
752*34a97f8cSStefano Zampini       deluxe_ctx->par_vec[i] = 0;
753*34a97f8cSStefano Zampini       deluxe_ctx->par_scctx_p[i] = 0;
754*34a97f8cSStefano Zampini       deluxe_ctx->par_scctx_s[i] = 0;
755*34a97f8cSStefano Zampini     }
756*34a97f8cSStefano Zampini     ierr = PetscSubcommDestroy(&par_subcomm);CHKERRQ(ierr);
757*34a97f8cSStefano Zampini   }
758*34a97f8cSStefano Zampini   for (i=0;i<deluxe_ctx->par_colors;i++) {
759*34a97f8cSStefano Zampini     ierr = ISDestroy(&is_colors[i]);CHKERRQ(ierr);
760*34a97f8cSStefano Zampini   }
761*34a97f8cSStefano Zampini   ierr = PetscFree(is_colors);CHKERRQ(ierr);
762*34a97f8cSStefano Zampini 
763*34a97f8cSStefano Zampini   if (pcbddc->dbg_flag) {
764*34a97f8cSStefano Zampini     Vec test_vec;
765*34a97f8cSStefano Zampini     PetscReal error;
766*34a97f8cSStefano Zampini     PCBDDCSubSchurs sub_schurs = deluxe_ctx->sub_schurs;
767*34a97f8cSStefano Zampini     /* test partition of unity of coloured schur complements  */
768*34a97f8cSStefano Zampini     for (i=0;i<deluxe_ctx->par_colors;i++) {
769*34a97f8cSStefano Zampini       PetscInt  subidx = deluxe_ctx->par_col2sub[i];
770*34a97f8cSStefano Zampini       PetscBool error_found = PETSC_FALSE;
771*34a97f8cSStefano Zampini       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
772*34a97f8cSStefano Zampini 
773*34a97f8cSStefano Zampini       if (deluxe_ctx->par_ksp[i]) {
774*34a97f8cSStefano Zampini         /* create random test vec being zero on internal nodes of the extende dirichlet problem */
775*34a97f8cSStefano Zampini         ierr = VecDuplicate(deluxe_ctx->par_vec[i],&test_vec);CHKERRQ(ierr);
776*34a97f8cSStefano Zampini         ierr = VecSetRandom(sub_schurs->work1[subidx],PETSC_NULL);CHKERRQ(ierr);
777*34a97f8cSStefano Zampini         ierr = VecSet(test_vec,0.0);CHKERRQ(ierr);
778*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
779*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
780*34a97f8cSStefano Zampini         /* w_j */
781*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],test_vec,sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783*34a97f8cSStefano Zampini         /* S_j*w_j */
784*34a97f8cSStefano Zampini         ierr = MatMult(sub_schurs->S_Ej[subidx],sub_schurs->work1[subidx],sub_schurs->work2[subidx]);CHKERRQ(ierr);
785*34a97f8cSStefano Zampini         /* \sum_j S_j*w_j */
786*34a97f8cSStefano Zampini         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
787*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
788*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work2[subidx],deluxe_ctx->par_vec[i],ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789*34a97f8cSStefano Zampini         /* (\sum_j S_j)^(-1)(\sum_j S_j*w_j) */
790*34a97f8cSStefano Zampini         ierr = KSPSolve(deluxe_ctx->par_ksp[i],deluxe_ctx->par_vec[i],deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
791*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
792*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],deluxe_ctx->par_vec[i],sub_schurs->work1[subidx],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
793*34a97f8cSStefano Zampini         ierr = VecSet(deluxe_ctx->par_vec[i],0.0);CHKERRQ(ierr);
794*34a97f8cSStefano Zampini         ierr = VecScatterBegin(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
795*34a97f8cSStefano Zampini         ierr = VecScatterEnd(deluxe_ctx->par_scctx_p[i],sub_schurs->work1[subidx],deluxe_ctx->par_vec[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796*34a97f8cSStefano Zampini         /* test partition of unity */
797*34a97f8cSStefano Zampini         ierr = VecAXPY(test_vec,-1.0,deluxe_ctx->par_vec[i]);CHKERRQ(ierr);
798*34a97f8cSStefano Zampini         ierr = VecNorm(test_vec,NORM_INFINITY,&error);CHKERRQ(ierr);
799*34a97f8cSStefano Zampini         if (PetscAbsReal(error) > 1.e-8) {
800*34a97f8cSStefano Zampini           /* ierr = VecView(test_vec,0);CHKERRQ(ierr); */
801*34a97f8cSStefano Zampini           error_found = PETSC_TRUE;
802*34a97f8cSStefano Zampini         }
803*34a97f8cSStefano Zampini         ierr = VecDestroy(&test_vec);CHKERRQ(ierr);
804*34a97f8cSStefano Zampini       }
805*34a97f8cSStefano Zampini       if (error_found) {
806*34a97f8cSStefano Zampini         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Error testing local schur for color %d and subdomain %d\n",i,PetscGlobalRank);CHKERRQ(ierr);
807*34a97f8cSStefano Zampini       }
808*34a97f8cSStefano Zampini       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
809*34a97f8cSStefano Zampini     }
810*34a97f8cSStefano Zampini   }
811*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
812*34a97f8cSStefano Zampini }
813*34a97f8cSStefano Zampini 
814*34a97f8cSStefano Zampini 
815*34a97f8cSStefano Zampini #undef __FUNCT__
816*34a97f8cSStefano Zampini #define __FUNCT__ "PCBDDCScalingSetUp_Deluxe_Seq"
817*34a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Seq(PC pc,PetscInt n_local_sequential_problems,PetscInt n_sequential_problems,PetscInt global_sequential[],PetscInt local_sequential[])
818*34a97f8cSStefano Zampini {
819*34a97f8cSStefano Zampini   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
820*34a97f8cSStefano Zampini   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
821*34a97f8cSStefano Zampini   PCBDDCSubSchurs     sub_schurs = deluxe_ctx->sub_schurs;
822*34a97f8cSStefano Zampini   Mat                 global_schur_subsets,*submat_global_schur_subsets,work_mat;
823*34a97f8cSStefano Zampini   IS                  is_to,is_from;
824*34a97f8cSStefano Zampini   PetscScalar         *array,*fill_vals;
825*34a97f8cSStefano Zampini   PetscInt            *all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G,*dummy_idx;
826*34a97f8cSStefano Zampini   PetscInt            i,j,k,local_problem_index;
827*34a97f8cSStefano Zampini   PetscInt            subset_size,max_subset_size,max_subset_size_red;
828*34a97f8cSStefano Zampini   PetscInt            local_size,global_size;
829*34a97f8cSStefano Zampini   char                ksp_prefix[256];
830*34a97f8cSStefano Zampini   size_t              len;
831*34a97f8cSStefano Zampini   PetscErrorCode      ierr;
832*34a97f8cSStefano Zampini 
833*34a97f8cSStefano Zampini   PetscFunctionBegin;
834*34a97f8cSStefano Zampini   if (!n_sequential_problems) {
835*34a97f8cSStefano Zampini     PetscFunctionReturn(0);
836*34a97f8cSStefano Zampini   }
837*34a97f8cSStefano Zampini   /* Get info on subset sizes and sum of all subsets sizes */
838*34a97f8cSStefano Zampini   max_subset_size = 0;
839*34a97f8cSStefano Zampini   local_size = 0;
840*34a97f8cSStefano Zampini   for (i=0;i<n_local_sequential_problems;i++) {
841*34a97f8cSStefano Zampini     local_problem_index = local_sequential[i];
842*34a97f8cSStefano Zampini     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
843*34a97f8cSStefano Zampini     max_subset_size = PetscMax(subset_size,max_subset_size);
844*34a97f8cSStefano Zampini     local_size += subset_size;
845*34a97f8cSStefano Zampini   }
846*34a97f8cSStefano Zampini 
847*34a97f8cSStefano Zampini   /* Work arrays for local indices */
848*34a97f8cSStefano Zampini   ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
849*34a97f8cSStefano Zampini   ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr);
850*34a97f8cSStefano Zampini 
851*34a97f8cSStefano Zampini   /* Get local indices in local whole numbering and local boundary numbering */
852*34a97f8cSStefano Zampini   local_size = 0;
853*34a97f8cSStefano Zampini   for (i=0;i<n_local_sequential_problems;i++) {
854*34a97f8cSStefano Zampini     PetscInt *idxs;
855*34a97f8cSStefano Zampini     /* get info on local problem */
856*34a97f8cSStefano Zampini     local_problem_index = local_sequential[i];
857*34a97f8cSStefano Zampini     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
858*34a97f8cSStefano Zampini     ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr);
859*34a97f8cSStefano Zampini     /* subset indices in local numbering */
860*34a97f8cSStefano Zampini     ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
861*34a97f8cSStefano Zampini     /* subset indices in local boundary numbering */
862*34a97f8cSStefano Zampini     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,subset_size,idxs,&j,&all_local_idx_B[local_size]);CHKERRQ(ierr);
863*34a97f8cSStefano Zampini     ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],(const PetscInt**)&idxs);CHKERRQ(ierr);
864*34a97f8cSStefano Zampini     if (j != subset_size) {
865*34a97f8cSStefano Zampini       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in BDDC deluxe serial %d (BtoNmap)! %d != %d\n",local_problem_index,subset_size,j);
866*34a97f8cSStefano Zampini     }
867*34a97f8cSStefano Zampini     local_size += subset_size;
868*34a97f8cSStefano Zampini   }
869*34a97f8cSStefano Zampini 
870*34a97f8cSStefano Zampini   /* Number dofs on all subsets (parallel) and sort numbering */
871*34a97f8cSStefano Zampini   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)pc),pcbddc->mat_graph->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
872*34a97f8cSStefano Zampini   ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr);
873*34a97f8cSStefano Zampini   for (i=0;i<local_size;i++) {
874*34a97f8cSStefano Zampini     all_permutation_G[i]=i;
875*34a97f8cSStefano Zampini   }
876*34a97f8cSStefano Zampini   ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr);
877*34a97f8cSStefano Zampini 
878*34a97f8cSStefano Zampini   /* Local matrix of all local Schur on subsets */
879*34a97f8cSStefano Zampini   ierr = MatCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_mat);CHKERRQ(ierr);
880*34a97f8cSStefano Zampini   ierr = MatSetSizes(deluxe_ctx->seq_mat,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
881*34a97f8cSStefano Zampini   ierr = MatSetType(deluxe_ctx->seq_mat,MATAIJ);CHKERRQ(ierr);
882*34a97f8cSStefano Zampini   ierr = MatSeqAIJSetPreallocation(deluxe_ctx->seq_mat,max_subset_size,PETSC_NULL);CHKERRQ(ierr);
883*34a97f8cSStefano Zampini 
884*34a97f8cSStefano Zampini   /* Global matrix of all assembled Schur on subsets */
885*34a97f8cSStefano Zampini   ierr = MatCreate(PetscObjectComm((PetscObject)pc),&global_schur_subsets);CHKERRQ(ierr);
886*34a97f8cSStefano Zampini   ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr);
887*34a97f8cSStefano Zampini   ierr = MatSetType(global_schur_subsets,MATAIJ);CHKERRQ(ierr);
888*34a97f8cSStefano Zampini   ierr = MPI_Allreduce(&max_subset_size,&max_subset_size_red,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
889*34a97f8cSStefano Zampini   ierr = MatMPIAIJSetPreallocation(global_schur_subsets,max_subset_size_red,PETSC_NULL,max_subset_size_red,PETSC_NULL);CHKERRQ(ierr);
890*34a97f8cSStefano Zampini 
891*34a97f8cSStefano Zampini   /* Work arrays */
892*34a97f8cSStefano Zampini   ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr);
893*34a97f8cSStefano Zampini 
894*34a97f8cSStefano Zampini   /* Loop on local problems to compute Schur complements explicitly */
895*34a97f8cSStefano Zampini   local_size = 0;
896*34a97f8cSStefano Zampini   for (i=0;i<n_local_sequential_problems;i++) {
897*34a97f8cSStefano Zampini     /* get info on local problem */
898*34a97f8cSStefano Zampini     local_problem_index = local_sequential[i];
899*34a97f8cSStefano Zampini     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
900*34a97f8cSStefano Zampini     /* local Schur */
901*34a97f8cSStefano Zampini     for (j=0;j<subset_size;j++) {
902*34a97f8cSStefano Zampini       ierr = VecSet(sub_schurs->work1[local_problem_index],0.0);CHKERRQ(ierr);
903*34a97f8cSStefano Zampini       ierr = VecSetValue(sub_schurs->work1[local_problem_index],j,1.0,INSERT_VALUES);CHKERRQ(ierr);
904*34a97f8cSStefano Zampini       ierr = MatMult(sub_schurs->S_Ej[local_problem_index],sub_schurs->work1[local_problem_index],sub_schurs->work2[local_problem_index]);CHKERRQ(ierr);
905*34a97f8cSStefano Zampini       /* store vals */
906*34a97f8cSStefano Zampini       ierr = VecGetArray(sub_schurs->work2[local_problem_index],&array);CHKERRQ(ierr);
907*34a97f8cSStefano Zampini       for (k=0;k<subset_size;k++) {
908*34a97f8cSStefano Zampini         fill_vals[k*subset_size+j] = array[k];
909*34a97f8cSStefano Zampini       }
910*34a97f8cSStefano Zampini       ierr = VecRestoreArray(sub_schurs->work2[local_problem_index],&array);CHKERRQ(ierr);
911*34a97f8cSStefano Zampini     }
912*34a97f8cSStefano Zampini     for (j=0;j<subset_size;j++) {
913*34a97f8cSStefano Zampini       dummy_idx[j]=local_size+j;
914*34a97f8cSStefano Zampini     }
915*34a97f8cSStefano Zampini     ierr = MatSetValues(deluxe_ctx->seq_mat,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr);
916*34a97f8cSStefano Zampini     ierr = MatSetValues(global_schur_subsets,subset_size,&all_local_idx_G[local_size],subset_size,&all_local_idx_G[local_size],fill_vals,ADD_VALUES);CHKERRQ(ierr);
917*34a97f8cSStefano Zampini     local_size += subset_size;
918*34a97f8cSStefano Zampini   }
919*34a97f8cSStefano Zampini   ierr = MatAssemblyBegin(deluxe_ctx->seq_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920*34a97f8cSStefano Zampini   ierr = MatAssemblyEnd(deluxe_ctx->seq_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
921*34a97f8cSStefano Zampini   ierr = MatAssemblyBegin(global_schur_subsets,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
922*34a97f8cSStefano Zampini   ierr = MatAssemblyEnd(global_schur_subsets,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
923*34a97f8cSStefano Zampini   ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr);
924*34a97f8cSStefano Zampini 
925*34a97f8cSStefano Zampini   /* Create work vectors for sequential part of deluxe */
926*34a97f8cSStefano Zampini   ierr = MatGetVecs(deluxe_ctx->seq_mat,&deluxe_ctx->seq_work1,&deluxe_ctx->seq_work2);CHKERRQ(ierr);
927*34a97f8cSStefano Zampini 
928*34a97f8cSStefano Zampini   /* Compute deluxe sequential scatter */
929*34a97f8cSStefano Zampini   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr);
930*34a97f8cSStefano Zampini   ierr = VecScatterCreate(pcbddc->work_scaling,is_from,deluxe_ctx->seq_work1,NULL,&deluxe_ctx->seq_scctx);CHKERRQ(ierr);
931*34a97f8cSStefano Zampini   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
932*34a97f8cSStefano Zampini 
933*34a97f8cSStefano Zampini   /* Get local part of (\sum_j S_Ej) */
934*34a97f8cSStefano Zampini   for (i=0;i<local_size;i++) {
935*34a97f8cSStefano Zampini     all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]];
936*34a97f8cSStefano Zampini   }
937*34a97f8cSStefano Zampini   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),local_size,all_local_idx_N,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
938*34a97f8cSStefano Zampini   ierr = MatGetSubMatrices(global_schur_subsets,1,&is_to,&is_to,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr);
939*34a97f8cSStefano Zampini   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
940*34a97f8cSStefano Zampini   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
941*34a97f8cSStefano Zampini   for (i=0;i<local_size;i++) {
942*34a97f8cSStefano Zampini     all_local_idx_G[all_permutation_G[i]] = i;
943*34a97f8cSStefano Zampini   }
944*34a97f8cSStefano Zampini   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&is_from);CHKERRQ(ierr);
945*34a97f8cSStefano Zampini   ierr = ISSetPermutation(is_from);CHKERRQ(ierr);
946*34a97f8cSStefano Zampini   ierr = MatPermute(submat_global_schur_subsets[0],is_from,is_from,&work_mat);CHKERRQ(ierr);
947*34a97f8cSStefano Zampini   ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr);
948*34a97f8cSStefano Zampini   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
949*34a97f8cSStefano Zampini   ierr = PetscFree(all_permutation_G);CHKERRQ(ierr);
950*34a97f8cSStefano Zampini 
951*34a97f8cSStefano Zampini   /* Create KSP object for sequential part of deluxe scaling */
952*34a97f8cSStefano Zampini   ierr = KSPCreate(PETSC_COMM_SELF,&deluxe_ctx->seq_ksp);CHKERRQ(ierr);
953*34a97f8cSStefano Zampini   ierr = KSPSetOperators(deluxe_ctx->seq_ksp,work_mat,work_mat);CHKERRQ(ierr);
954*34a97f8cSStefano Zampini   ierr = KSPSetType(deluxe_ctx->seq_ksp,KSPPREONLY);CHKERRQ(ierr);
955*34a97f8cSStefano Zampini   ierr = PetscStrlen(((PetscObject)(pcbddc->ksp_D))->prefix,&len);CHKERRQ(ierr);
956*34a97f8cSStefano Zampini   len -= 10; /* remove "dirichlet_" */
957*34a97f8cSStefano Zampini   ierr = PetscStrncpy(ksp_prefix,((PetscObject)(pcbddc->ksp_D))->prefix,len);CHKERRQ(ierr);
958*34a97f8cSStefano Zampini   *(ksp_prefix+len)='\0';
959*34a97f8cSStefano Zampini   ierr = PetscStrcat(ksp_prefix,"deluxe_seq_");CHKERRQ(ierr);
960*34a97f8cSStefano Zampini   ierr = KSPSetOptionsPrefix(deluxe_ctx->seq_ksp,ksp_prefix);CHKERRQ(ierr);
961*34a97f8cSStefano Zampini   ierr = KSPSetFromOptions(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
962*34a97f8cSStefano Zampini   ierr = KSPSetUp(deluxe_ctx->seq_ksp);CHKERRQ(ierr);
963*34a97f8cSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] PREFIX SEQ is %s\n",PetscGlobalRank,((PetscObject)deluxe_ctx->seq_ksp)->prefix);CHKERRQ(ierr);
964*34a97f8cSStefano Zampini   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
965*34a97f8cSStefano Zampini   PetscFunctionReturn(0);
966*34a97f8cSStefano Zampini }
967