xref: /petsc/src/mat/graphops/coarsen/impls/hem/hem.c (revision 8be712e46db5d855f641c6bd97b4543e0efe65bd)
1*8be712e4SBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
2*8be712e4SBarry Smith #include <../src/mat/impls/aij/seq/aij.h>
3*8be712e4SBarry Smith #include <../src/mat/impls/aij/mpi/mpiaij.h>
4*8be712e4SBarry Smith 
5*8be712e4SBarry Smith /* linked list methods
6*8be712e4SBarry Smith  *
7*8be712e4SBarry Smith  *  PetscCDCreate
8*8be712e4SBarry Smith  */
9*8be712e4SBarry Smith PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
10*8be712e4SBarry Smith {
11*8be712e4SBarry Smith   PetscCoarsenData *ail;
12*8be712e4SBarry Smith 
13*8be712e4SBarry Smith   PetscFunctionBegin;
14*8be712e4SBarry Smith   /* allocate pool, partially */
15*8be712e4SBarry Smith   PetscCall(PetscNew(&ail));
16*8be712e4SBarry Smith   *a_out               = ail;
17*8be712e4SBarry Smith   ail->pool_list.next  = NULL;
18*8be712e4SBarry Smith   ail->pool_list.array = NULL;
19*8be712e4SBarry Smith   ail->chk_sz          = 0;
20*8be712e4SBarry Smith   /* allocate array */
21*8be712e4SBarry Smith   ail->size = a_size;
22*8be712e4SBarry Smith   PetscCall(PetscCalloc1(a_size, &ail->array));
23*8be712e4SBarry Smith   ail->extra_nodes = NULL;
24*8be712e4SBarry Smith   ail->mat         = NULL;
25*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
26*8be712e4SBarry Smith }
27*8be712e4SBarry Smith 
28*8be712e4SBarry Smith /* PetscCDDestroy
29*8be712e4SBarry Smith  */
30*8be712e4SBarry Smith PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
31*8be712e4SBarry Smith {
32*8be712e4SBarry Smith   PetscCDArrNd *n = &ail->pool_list;
33*8be712e4SBarry Smith 
34*8be712e4SBarry Smith   PetscFunctionBegin;
35*8be712e4SBarry Smith   n = n->next;
36*8be712e4SBarry Smith   while (n) {
37*8be712e4SBarry Smith     PetscCDArrNd *lstn = n;
38*8be712e4SBarry Smith     n                  = n->next;
39*8be712e4SBarry Smith     PetscCall(PetscFree(lstn));
40*8be712e4SBarry Smith   }
41*8be712e4SBarry Smith   if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
42*8be712e4SBarry Smith   PetscCall(PetscFree(ail->array));
43*8be712e4SBarry Smith   if (ail->mat) PetscCall(MatDestroy(&ail->mat));
44*8be712e4SBarry Smith   /* delete this (+agg+pool array) */
45*8be712e4SBarry Smith   PetscCall(PetscFree(ail));
46*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
47*8be712e4SBarry Smith }
48*8be712e4SBarry Smith 
49*8be712e4SBarry Smith /* PetscCDSetChunkSize
50*8be712e4SBarry Smith  */
51*8be712e4SBarry Smith PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz)
52*8be712e4SBarry Smith {
53*8be712e4SBarry Smith   PetscFunctionBegin;
54*8be712e4SBarry Smith   ail->chk_sz = a_sz;
55*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
56*8be712e4SBarry Smith }
57*8be712e4SBarry Smith 
58*8be712e4SBarry Smith /*  PetscCDGetNewNode
59*8be712e4SBarry Smith  */
60*8be712e4SBarry Smith static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
61*8be712e4SBarry Smith {
62*8be712e4SBarry Smith   PetscFunctionBegin;
63*8be712e4SBarry Smith   *a_out = NULL; /* squelch -Wmaybe-uninitialized */
64*8be712e4SBarry Smith   if (ail->extra_nodes) {
65*8be712e4SBarry Smith     PetscCDIntNd *node = ail->extra_nodes;
66*8be712e4SBarry Smith     ail->extra_nodes   = node->next;
67*8be712e4SBarry Smith     node->gid          = a_id;
68*8be712e4SBarry Smith     node->next         = NULL;
69*8be712e4SBarry Smith     *a_out             = node;
70*8be712e4SBarry Smith   } else {
71*8be712e4SBarry Smith     if (!ail->pool_list.array) {
72*8be712e4SBarry Smith       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
73*8be712e4SBarry Smith       PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
74*8be712e4SBarry Smith       ail->new_node       = ail->pool_list.array;
75*8be712e4SBarry Smith       ail->new_left       = ail->chk_sz;
76*8be712e4SBarry Smith       ail->new_node->next = NULL;
77*8be712e4SBarry Smith     } else if (!ail->new_left) {
78*8be712e4SBarry Smith       PetscCDArrNd *node;
79*8be712e4SBarry Smith       PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
80*8be712e4SBarry Smith       node->array         = (PetscCDIntNd *)(node + 1);
81*8be712e4SBarry Smith       node->next          = ail->pool_list.next;
82*8be712e4SBarry Smith       ail->pool_list.next = node;
83*8be712e4SBarry Smith       ail->new_left       = ail->chk_sz;
84*8be712e4SBarry Smith       ail->new_node       = node->array;
85*8be712e4SBarry Smith     }
86*8be712e4SBarry Smith     ail->new_node->gid  = a_id;
87*8be712e4SBarry Smith     ail->new_node->next = NULL;
88*8be712e4SBarry Smith     *a_out              = ail->new_node++;
89*8be712e4SBarry Smith     ail->new_left--;
90*8be712e4SBarry Smith   }
91*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
92*8be712e4SBarry Smith }
93*8be712e4SBarry Smith 
94*8be712e4SBarry Smith /* PetscCDIntNdSetID
95*8be712e4SBarry Smith  */
96*8be712e4SBarry Smith PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
97*8be712e4SBarry Smith {
98*8be712e4SBarry Smith   PetscFunctionBegin;
99*8be712e4SBarry Smith   a_this->gid = a_id;
100*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
101*8be712e4SBarry Smith }
102*8be712e4SBarry Smith 
103*8be712e4SBarry Smith /* PetscCDIntNdGetID
104*8be712e4SBarry Smith  */
105*8be712e4SBarry Smith PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
106*8be712e4SBarry Smith {
107*8be712e4SBarry Smith   PetscFunctionBegin;
108*8be712e4SBarry Smith   *a_gid = a_this->gid;
109*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
110*8be712e4SBarry Smith }
111*8be712e4SBarry Smith 
112*8be712e4SBarry Smith /* PetscCDGetHeadPos
113*8be712e4SBarry Smith  */
114*8be712e4SBarry Smith PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
115*8be712e4SBarry Smith {
116*8be712e4SBarry Smith   PetscFunctionBegin;
117*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
118*8be712e4SBarry Smith   *pos = ail->array[a_idx];
119*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
120*8be712e4SBarry Smith }
121*8be712e4SBarry Smith 
122*8be712e4SBarry Smith /* PetscCDGetNextPos
123*8be712e4SBarry Smith  */
124*8be712e4SBarry Smith PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
125*8be712e4SBarry Smith {
126*8be712e4SBarry Smith   PetscFunctionBegin;
127*8be712e4SBarry Smith   PetscCheck((*pos), PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
128*8be712e4SBarry Smith   *pos = (*pos)->next;
129*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
130*8be712e4SBarry Smith }
131*8be712e4SBarry Smith 
132*8be712e4SBarry Smith /* PetscCDAppendID
133*8be712e4SBarry Smith  */
134*8be712e4SBarry Smith PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
135*8be712e4SBarry Smith {
136*8be712e4SBarry Smith   PetscCDIntNd *n, *n2;
137*8be712e4SBarry Smith 
138*8be712e4SBarry Smith   PetscFunctionBegin;
139*8be712e4SBarry Smith   PetscCall(PetscCDGetNewNode(ail, &n, a_id));
140*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
141*8be712e4SBarry Smith   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
142*8be712e4SBarry Smith   else {
143*8be712e4SBarry Smith     do {
144*8be712e4SBarry Smith       if (!n2->next) {
145*8be712e4SBarry Smith         n2->next = n;
146*8be712e4SBarry Smith         PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
147*8be712e4SBarry Smith         break;
148*8be712e4SBarry Smith       }
149*8be712e4SBarry Smith       n2 = n2->next;
150*8be712e4SBarry Smith     } while (n2);
151*8be712e4SBarry Smith     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
152*8be712e4SBarry Smith   }
153*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
154*8be712e4SBarry Smith }
155*8be712e4SBarry Smith 
156*8be712e4SBarry Smith /* PetscCDAppendNode
157*8be712e4SBarry Smith  */
158*8be712e4SBarry Smith PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
159*8be712e4SBarry Smith {
160*8be712e4SBarry Smith   PetscCDIntNd *n2;
161*8be712e4SBarry Smith 
162*8be712e4SBarry Smith   PetscFunctionBegin;
163*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
164*8be712e4SBarry Smith   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
165*8be712e4SBarry Smith   else {
166*8be712e4SBarry Smith     do {
167*8be712e4SBarry Smith       if (!n2->next) {
168*8be712e4SBarry Smith         n2->next  = a_n;
169*8be712e4SBarry Smith         a_n->next = NULL;
170*8be712e4SBarry Smith         break;
171*8be712e4SBarry Smith       }
172*8be712e4SBarry Smith       n2 = n2->next;
173*8be712e4SBarry Smith     } while (n2);
174*8be712e4SBarry Smith     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
175*8be712e4SBarry Smith   }
176*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
177*8be712e4SBarry Smith }
178*8be712e4SBarry Smith 
179*8be712e4SBarry Smith /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used)
180*8be712e4SBarry Smith  */
181*8be712e4SBarry Smith PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
182*8be712e4SBarry Smith {
183*8be712e4SBarry Smith   PetscCDIntNd *del;
184*8be712e4SBarry Smith 
185*8be712e4SBarry Smith   PetscFunctionBegin;
186*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
187*8be712e4SBarry Smith   PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
188*8be712e4SBarry Smith   del          = a_last->next;
189*8be712e4SBarry Smith   a_last->next = del->next;
190*8be712e4SBarry Smith   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
191*8be712e4SBarry Smith   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
192*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
193*8be712e4SBarry Smith }
194*8be712e4SBarry Smith 
195*8be712e4SBarry Smith /* PetscCDPrint
196*8be712e4SBarry Smith  */
197*8be712e4SBarry Smith PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm)
198*8be712e4SBarry Smith {
199*8be712e4SBarry Smith   PetscCDIntNd *n, *n2;
200*8be712e4SBarry Smith   PetscInt      ii;
201*8be712e4SBarry Smith 
202*8be712e4SBarry Smith   PetscFunctionBegin;
203*8be712e4SBarry Smith   for (ii = 0; ii < ail->size; ii++) {
204*8be712e4SBarry Smith     n2 = n = ail->array[ii];
205*8be712e4SBarry Smith     if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0));
206*8be712e4SBarry Smith     while (n) {
207*8be712e4SBarry Smith       PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid));
208*8be712e4SBarry Smith       n = n->next;
209*8be712e4SBarry Smith     }
210*8be712e4SBarry Smith     if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n"));
211*8be712e4SBarry Smith   }
212*8be712e4SBarry Smith   PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
213*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
214*8be712e4SBarry Smith }
215*8be712e4SBarry Smith 
216*8be712e4SBarry Smith /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx
217*8be712e4SBarry Smith  */
218*8be712e4SBarry Smith PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
219*8be712e4SBarry Smith {
220*8be712e4SBarry Smith   PetscCDIntNd *n;
221*8be712e4SBarry Smith 
222*8be712e4SBarry Smith   PetscFunctionBegin;
223*8be712e4SBarry Smith   PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
224*8be712e4SBarry Smith   PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
225*8be712e4SBarry Smith   PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
226*8be712e4SBarry Smith   n = ail->array[a_destidx];
227*8be712e4SBarry Smith   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
228*8be712e4SBarry Smith   else {
229*8be712e4SBarry Smith     do {
230*8be712e4SBarry Smith       if (!n->next) {
231*8be712e4SBarry Smith         n->next = ail->array[a_srcidx]; // append
232*8be712e4SBarry Smith         break;
233*8be712e4SBarry Smith       }
234*8be712e4SBarry Smith       n = n->next;
235*8be712e4SBarry Smith     } while (1);
236*8be712e4SBarry Smith   }
237*8be712e4SBarry Smith   ail->array[a_srcidx] = NULL; // empty
238*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
239*8be712e4SBarry Smith }
240*8be712e4SBarry Smith 
241*8be712e4SBarry Smith /* PetscCDRemoveAllAt - empty one list and move data to cache
242*8be712e4SBarry Smith  */
243*8be712e4SBarry Smith PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx)
244*8be712e4SBarry Smith {
245*8be712e4SBarry Smith   PetscCDIntNd *rem, *n1;
246*8be712e4SBarry Smith 
247*8be712e4SBarry Smith   PetscFunctionBegin;
248*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
249*8be712e4SBarry Smith   rem               = ail->array[a_idx];
250*8be712e4SBarry Smith   ail->array[a_idx] = NULL;
251*8be712e4SBarry Smith   if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
252*8be712e4SBarry Smith   else {
253*8be712e4SBarry Smith     while (n1->next) n1 = n1->next;
254*8be712e4SBarry Smith     n1->next = rem;
255*8be712e4SBarry Smith   }
256*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
257*8be712e4SBarry Smith }
258*8be712e4SBarry Smith 
259*8be712e4SBarry Smith /* PetscCDCountAt
260*8be712e4SBarry Smith  */
261*8be712e4SBarry Smith PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
262*8be712e4SBarry Smith {
263*8be712e4SBarry Smith   PetscCDIntNd *n1;
264*8be712e4SBarry Smith   PetscInt      sz = 0;
265*8be712e4SBarry Smith 
266*8be712e4SBarry Smith   PetscFunctionBegin;
267*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
268*8be712e4SBarry Smith   n1 = ail->array[a_idx];
269*8be712e4SBarry Smith   while (n1) {
270*8be712e4SBarry Smith     n1 = n1->next;
271*8be712e4SBarry Smith     sz++;
272*8be712e4SBarry Smith   }
273*8be712e4SBarry Smith   *a_sz = sz;
274*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
275*8be712e4SBarry Smith }
276*8be712e4SBarry Smith 
277*8be712e4SBarry Smith /* PetscCDSize
278*8be712e4SBarry Smith  */
279*8be712e4SBarry Smith PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz)
280*8be712e4SBarry Smith {
281*8be712e4SBarry Smith   PetscInt sz = 0;
282*8be712e4SBarry Smith 
283*8be712e4SBarry Smith   PetscFunctionBegin;
284*8be712e4SBarry Smith   for (int ii = 0; ii < ail->size; ii++) {
285*8be712e4SBarry Smith     PetscCDIntNd *n1 = ail->array[ii];
286*8be712e4SBarry Smith     while (n1) {
287*8be712e4SBarry Smith       n1 = n1->next;
288*8be712e4SBarry Smith       sz++;
289*8be712e4SBarry Smith     }
290*8be712e4SBarry Smith   }
291*8be712e4SBarry Smith   *a_sz = sz;
292*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
293*8be712e4SBarry Smith }
294*8be712e4SBarry Smith 
295*8be712e4SBarry Smith /* PetscCDIsEmptyAt - Is the list empty? (not used)
296*8be712e4SBarry Smith  */
297*8be712e4SBarry Smith PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
298*8be712e4SBarry Smith {
299*8be712e4SBarry Smith   PetscFunctionBegin;
300*8be712e4SBarry Smith   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
301*8be712e4SBarry Smith   *a_e = (PetscBool)(ail->array[a_idx] == NULL);
302*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
303*8be712e4SBarry Smith }
304*8be712e4SBarry Smith 
305*8be712e4SBarry Smith /* PetscCDGetNonemptyIS - used for C-F methods
306*8be712e4SBarry Smith  */
307*8be712e4SBarry Smith PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis)
308*8be712e4SBarry Smith {
309*8be712e4SBarry Smith   PetscCDIntNd *n;
310*8be712e4SBarry Smith   PetscInt      ii, kk;
311*8be712e4SBarry Smith   PetscInt     *permute;
312*8be712e4SBarry Smith 
313*8be712e4SBarry Smith   PetscFunctionBegin;
314*8be712e4SBarry Smith   for (ii = kk = 0; ii < ail->size; ii++) {
315*8be712e4SBarry Smith     n = ail->array[ii];
316*8be712e4SBarry Smith     if (n) kk++;
317*8be712e4SBarry Smith   }
318*8be712e4SBarry Smith   PetscCall(PetscMalloc1(kk, &permute));
319*8be712e4SBarry Smith   for (ii = kk = 0; ii < ail->size; ii++) {
320*8be712e4SBarry Smith     n = ail->array[ii];
321*8be712e4SBarry Smith     if (n) permute[kk++] = ii;
322*8be712e4SBarry Smith   }
323*8be712e4SBarry Smith   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
324*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
325*8be712e4SBarry Smith }
326*8be712e4SBarry Smith 
327*8be712e4SBarry Smith /* PetscCDGetMat
328*8be712e4SBarry Smith  */
329*8be712e4SBarry Smith PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
330*8be712e4SBarry Smith {
331*8be712e4SBarry Smith   PetscFunctionBegin;
332*8be712e4SBarry Smith   *a_mat   = ail->mat;
333*8be712e4SBarry Smith   ail->mat = NULL; // give it up
334*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
335*8be712e4SBarry Smith }
336*8be712e4SBarry Smith 
337*8be712e4SBarry Smith /* PetscCDSetMat
338*8be712e4SBarry Smith  */
339*8be712e4SBarry Smith PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
340*8be712e4SBarry Smith {
341*8be712e4SBarry Smith   PetscFunctionBegin;
342*8be712e4SBarry Smith   if (ail->mat) {
343*8be712e4SBarry Smith     PetscCall(MatDestroy(&ail->mat)); //should not happen
344*8be712e4SBarry Smith   }
345*8be712e4SBarry Smith   ail->mat = a_mat;
346*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
347*8be712e4SBarry Smith }
348*8be712e4SBarry Smith 
349*8be712e4SBarry Smith /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
350*8be712e4SBarry Smith  */
351*8be712e4SBarry Smith PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is)
352*8be712e4SBarry Smith {
353*8be712e4SBarry Smith   PetscCDIntNd *n;
354*8be712e4SBarry Smith   PetscInt      lsz, ii, kk, *idxs, jj, gid;
355*8be712e4SBarry Smith   IS           *is_loc = NULL;
356*8be712e4SBarry Smith 
357*8be712e4SBarry Smith   PetscFunctionBegin;
358*8be712e4SBarry Smith   for (ii = kk = 0; ii < ail->size; ii++) {
359*8be712e4SBarry Smith     if (ail->array[ii]) kk++;
360*8be712e4SBarry Smith   }
361*8be712e4SBarry Smith   *a_sz = kk;
362*8be712e4SBarry Smith   PetscCall(PetscMalloc1(kk, &is_loc));
363*8be712e4SBarry Smith   for (ii = kk = 0; ii < ail->size; ii++) {
364*8be712e4SBarry Smith     for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
365*8be712e4SBarry Smith       ;
366*8be712e4SBarry Smith     if (lsz) {
367*8be712e4SBarry Smith       PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
368*8be712e4SBarry Smith       for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
369*8be712e4SBarry Smith         PetscCall(PetscCDIntNdGetID(n, &gid));
370*8be712e4SBarry Smith         for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
371*8be712e4SBarry Smith       }
372*8be712e4SBarry Smith       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
373*8be712e4SBarry Smith     }
374*8be712e4SBarry Smith   }
375*8be712e4SBarry Smith   PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
376*8be712e4SBarry Smith   *a_local_is = is_loc; /* out */
377*8be712e4SBarry Smith 
378*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
379*8be712e4SBarry Smith }
380*8be712e4SBarry Smith 
381*8be712e4SBarry Smith /* edge for priority queue */
382*8be712e4SBarry Smith typedef struct edge_tag {
383*8be712e4SBarry Smith   PetscReal weight;
384*8be712e4SBarry Smith   PetscInt  lid0, gid1, ghost1_idx;
385*8be712e4SBarry Smith } Edge;
386*8be712e4SBarry Smith 
387*8be712e4SBarry Smith #define MY_MEPS (PETSC_MACHINE_EPSILON * 100)
388*8be712e4SBarry Smith static int gamg_hem_compare(const void *a, const void *b)
389*8be712e4SBarry Smith {
390*8be712e4SBarry Smith   PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
391*8be712e4SBarry Smith   return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */
392*8be712e4SBarry Smith }
393*8be712e4SBarry Smith /* static int gamg_hem_compare3(const void *a, const void *b, void *ctx) */
394*8be712e4SBarry Smith /* { */
395*8be712e4SBarry Smith /*   return gamg_hem_compare(a, b); */
396*8be712e4SBarry Smith /* } */
397*8be712e4SBarry Smith 
398*8be712e4SBarry Smith /*
399*8be712e4SBarry Smith   MatCoarsenApply_HEM_private - parallel heavy edge matching
400*8be712e4SBarry Smith 
401*8be712e4SBarry Smith   Input Parameter:
402*8be712e4SBarry Smith    . a_Gmat - global matrix of the graph
403*8be712e4SBarry Smith    . n_iter - number of matching iterations
404*8be712e4SBarry Smith    . threshold - threshold for filtering graphs
405*8be712e4SBarry Smith 
406*8be712e4SBarry Smith   Output Parameter:
407*8be712e4SBarry Smith    . a_locals_llist - array of list of local nodes rooted at local node
408*8be712e4SBarry Smith */
409*8be712e4SBarry Smith static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist)
410*8be712e4SBarry Smith {
411*8be712e4SBarry Smith #define REQ_BF_SIZE 100
412*8be712e4SBarry Smith   PetscBool         isMPI;
413*8be712e4SBarry Smith   MPI_Comm          comm;
414*8be712e4SBarry Smith   PetscInt          ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0;
415*8be712e4SBarry Smith   PetscMPIInt       rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe;
416*8be712e4SBarry Smith   const PetscInt    nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt));
417*8be712e4SBarry Smith   PetscInt         *lid_cprowID;
418*8be712e4SBarry Smith   PetscBool        *lid_matched;
419*8be712e4SBarry Smith   Mat_SeqAIJ       *matA, *matB = NULL;
420*8be712e4SBarry Smith   Mat_MPIAIJ       *mpimat     = NULL;
421*8be712e4SBarry Smith   PetscScalar       one        = 1.;
422*8be712e4SBarry Smith   PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL;
423*8be712e4SBarry Smith   Mat               cMat, tMat, P;
424*8be712e4SBarry Smith   MatScalar        *ap;
425*8be712e4SBarry Smith   IS                info_is;
426*8be712e4SBarry Smith 
427*8be712e4SBarry Smith   PetscFunctionBegin;
428*8be712e4SBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
429*8be712e4SBarry Smith   PetscCallMPI(MPI_Comm_rank(comm, &rank));
430*8be712e4SBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
431*8be712e4SBarry Smith   PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend));
432*8be712e4SBarry Smith   PetscCall(ISCreate(comm, &info_is));
433*8be712e4SBarry Smith   PetscCall(PetscInfo(info_is, "%" PetscInt_FMT " iterations of HEM.\n", n_iter));
434*8be712e4SBarry Smith 
435*8be712e4SBarry Smith   PetscCall(PetscMalloc1(nloc, &lid_matched));
436*8be712e4SBarry Smith   PetscCall(PetscMalloc1(nloc, &lid_cprowID));
437*8be712e4SBarry Smith   PetscCall(PetscMalloc1(nloc, &lid_max_pe));
438*8be712e4SBarry Smith 
439*8be712e4SBarry Smith   PetscCall(PetscCDCreate(nloc, &agg_llists));
440*8be712e4SBarry Smith   PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1));
441*8be712e4SBarry Smith   *a_locals_llist = agg_llists;
442*8be712e4SBarry Smith   /* add self to all lists */
443*8be712e4SBarry Smith   for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk));
444*8be712e4SBarry Smith   /* make a copy of the graph, this gets destroyed in iterates */
445*8be712e4SBarry Smith   PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
446*8be712e4SBarry Smith   PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat));
447*8be712e4SBarry Smith   isMPI = (PetscBool)(size > 1);
448*8be712e4SBarry Smith   if (isMPI) {
449*8be712e4SBarry Smith     /* list of deleted ghosts, should compress this */
450*8be712e4SBarry Smith     PetscCall(PetscCDCreate(size, &ghost_deleted_list));
451*8be712e4SBarry Smith     PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100));
452*8be712e4SBarry Smith   }
453*8be712e4SBarry Smith   for (int iter = 0; iter < n_iter; iter++) {
454*8be712e4SBarry Smith     const PetscScalar *lghost_max_ew, *lid_max_ew;
455*8be712e4SBarry Smith     PetscBool         *lghost_matched;
456*8be712e4SBarry Smith     PetscMPIInt       *lghost_pe, *lghost_max_pe;
457*8be712e4SBarry Smith     Vec                locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE;
458*8be712e4SBarry Smith     PetscInt          *lghost_gid, nEdges, nEdges0, num_ghosts = 0;
459*8be712e4SBarry Smith     Edge              *Edges;
460*8be712e4SBarry Smith     const int          n_sub_its = 2500; // in case of a bug, stop at some point
461*8be712e4SBarry Smith     /* get submatrices of cMat */
462*8be712e4SBarry Smith     for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
463*8be712e4SBarry Smith     if (isMPI) {
464*8be712e4SBarry Smith       mpimat = (Mat_MPIAIJ *)cMat->data;
465*8be712e4SBarry Smith       matA   = (Mat_SeqAIJ *)mpimat->A->data;
466*8be712e4SBarry Smith       matB   = (Mat_SeqAIJ *)mpimat->B->data;
467*8be712e4SBarry Smith       if (!matB->compressedrow.use) {
468*8be712e4SBarry Smith         /* force construction of compressed row data structure since code below requires it */
469*8be712e4SBarry Smith         PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
470*8be712e4SBarry Smith       }
471*8be712e4SBarry Smith       /* set index into compressed row 'lid_cprowID' */
472*8be712e4SBarry Smith       for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
473*8be712e4SBarry Smith         PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix];
474*8be712e4SBarry Smith         if (ridx[ix] >= 0) lid_cprowID[lid] = ix;
475*8be712e4SBarry Smith         else PetscCall(PetscPrintf(PETSC_COMM_SELF, "Missing slot in cprow? %d/%d ", (int)ix, (int)matB->compressedrow.nrows));
476*8be712e4SBarry Smith       }
477*8be712e4SBarry Smith     } else {
478*8be712e4SBarry Smith       matA = (Mat_SeqAIJ *)cMat->data;
479*8be712e4SBarry Smith     }
480*8be712e4SBarry Smith     /* set matched flags: true for empty list */
481*8be712e4SBarry Smith     for (int kk = 0; kk < nloc; kk++) {
482*8be712e4SBarry Smith       PetscCall(PetscCDCountAt(agg_llists, kk, &ix));
483*8be712e4SBarry Smith       if (ix > 0) lid_matched[kk] = PETSC_FALSE;
484*8be712e4SBarry Smith       else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched
485*8be712e4SBarry Smith     }
486*8be712e4SBarry Smith     /* max edge and pe vecs */
487*8be712e4SBarry Smith     PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
488*8be712e4SBarry Smith     PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));
489*8be712e4SBarry Smith     /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */
490*8be712e4SBarry Smith     if (isMPI) {
491*8be712e4SBarry Smith       Vec                vec;
492*8be712e4SBarry Smith       PetscScalar        vval;
493*8be712e4SBarry Smith       const PetscScalar *buf;
494*8be712e4SBarry Smith       PetscCall(MatCreateVecs(cMat, &vec, NULL));
495*8be712e4SBarry Smith       PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
496*8be712e4SBarry Smith       /* lghost_matched */
497*8be712e4SBarry Smith       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
498*8be712e4SBarry Smith         PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
499*8be712e4SBarry Smith         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES));
500*8be712e4SBarry Smith       }
501*8be712e4SBarry Smith       PetscCall(VecAssemblyBegin(vec));
502*8be712e4SBarry Smith       PetscCall(VecAssemblyEnd(vec));
503*8be712e4SBarry Smith       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
504*8be712e4SBarry Smith       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
505*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
506*8be712e4SBarry Smith       PetscCall(PetscMalloc1(num_ghosts, &lghost_matched));
507*8be712e4SBarry Smith       for (int kk = 0; kk < num_ghosts; kk++) {
508*8be712e4SBarry Smith         lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now
509*8be712e4SBarry Smith       }
510*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
511*8be712e4SBarry Smith       /* lghost_pe */
512*8be712e4SBarry Smith       vval = (PetscScalar)(rank);
513*8be712e4SBarry Smith       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
514*8be712e4SBarry Smith       PetscCall(VecAssemblyBegin(vec));
515*8be712e4SBarry Smith       PetscCall(VecAssemblyEnd(vec));
516*8be712e4SBarry Smith       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
517*8be712e4SBarry Smith       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
518*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
519*8be712e4SBarry Smith       PetscCall(PetscMalloc1(num_ghosts, &lghost_pe));
520*8be712e4SBarry Smith       for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now
521*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
522*8be712e4SBarry Smith       /* lghost_gid */
523*8be712e4SBarry Smith       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
524*8be712e4SBarry Smith         vval = (PetscScalar)(gid);
525*8be712e4SBarry Smith         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
526*8be712e4SBarry Smith       }
527*8be712e4SBarry Smith       PetscCall(VecAssemblyBegin(vec));
528*8be712e4SBarry Smith       PetscCall(VecAssemblyEnd(vec));
529*8be712e4SBarry Smith       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
530*8be712e4SBarry Smith       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
531*8be712e4SBarry Smith       PetscCall(VecDestroy(&vec));
532*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */
533*8be712e4SBarry Smith       PetscCall(PetscMalloc1(num_ghosts, &lghost_gid));
534*8be712e4SBarry Smith       for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]);
535*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
536*8be712e4SBarry Smith     }
537*8be712e4SBarry Smith     // get 'comm_procs' (could hoist)
538*8be712e4SBarry Smith     for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1;
539*8be712e4SBarry Smith     for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) {
540*8be712e4SBarry Smith       PetscMPIInt proc = lghost_pe[ix], idx = -1;
541*8be712e4SBarry Smith       for (int k = 0; k < ncomm_procs && idx == -1; k++)
542*8be712e4SBarry Smith         if (comm_procs[k] == proc) idx = k;
543*8be712e4SBarry Smith       if (idx == -1) { comm_procs[ncomm_procs++] = proc; }
544*8be712e4SBarry Smith       PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs);
545*8be712e4SBarry Smith     }
546*8be712e4SBarry Smith     /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */
547*8be712e4SBarry Smith     nEdges0 = 0;
548*8be712e4SBarry Smith     for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
549*8be712e4SBarry Smith       PetscReal   max_e = 0., tt;
550*8be712e4SBarry Smith       PetscScalar vval;
551*8be712e4SBarry Smith       PetscInt    lid = kk, max_pe = rank, pe, n;
552*8be712e4SBarry Smith       ii = matA->i;
553*8be712e4SBarry Smith       n  = ii[lid + 1] - ii[lid];
554*8be712e4SBarry Smith       aj = matA->j + ii[lid];
555*8be712e4SBarry Smith       ap = matA->a + ii[lid];
556*8be712e4SBarry Smith       for (int jj = 0; jj < n; jj++) {
557*8be712e4SBarry Smith         PetscInt lidj = aj[jj];
558*8be712e4SBarry Smith         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
559*8be712e4SBarry Smith           if (tt > max_e) max_e = tt;
560*8be712e4SBarry Smith           if (lidj > lid) nEdges0++;
561*8be712e4SBarry Smith         }
562*8be712e4SBarry Smith       }
563*8be712e4SBarry Smith       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
564*8be712e4SBarry Smith         ii = matB->compressedrow.i;
565*8be712e4SBarry Smith         n  = ii[ix + 1] - ii[ix];
566*8be712e4SBarry Smith         ap = matB->a + ii[ix];
567*8be712e4SBarry Smith         aj = matB->j + ii[ix];
568*8be712e4SBarry Smith         for (int jj = 0; jj < n; jj++) {
569*8be712e4SBarry Smith           if ((tt = PetscRealPart(ap[jj])) > threshold) {
570*8be712e4SBarry Smith             if (tt > max_e) max_e = tt;
571*8be712e4SBarry Smith             nEdges0++;
572*8be712e4SBarry Smith             if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe;
573*8be712e4SBarry Smith           }
574*8be712e4SBarry Smith         }
575*8be712e4SBarry Smith       }
576*8be712e4SBarry Smith       vval = max_e;
577*8be712e4SBarry Smith       PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));
578*8be712e4SBarry Smith       vval = (PetscScalar)max_pe;
579*8be712e4SBarry Smith       PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
580*8be712e4SBarry Smith       if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate
581*8be712e4SBarry Smith         lid_matched[lid] = PETSC_TRUE;
582*8be712e4SBarry Smith         if (bc_agg == -1) {
583*8be712e4SBarry Smith           bc_agg = lid;
584*8be712e4SBarry Smith           PetscCall(PetscCDCreate(1, &bc_list));
585*8be712e4SBarry Smith         }
586*8be712e4SBarry Smith         PetscCall(PetscCDRemoveAllAt(agg_llists, lid));
587*8be712e4SBarry Smith         PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid));
588*8be712e4SBarry Smith       }
589*8be712e4SBarry Smith     }
590*8be712e4SBarry Smith     PetscCall(VecAssemblyBegin(locMaxEdge));
591*8be712e4SBarry Smith     PetscCall(VecAssemblyEnd(locMaxEdge));
592*8be712e4SBarry Smith     PetscCall(VecAssemblyBegin(locMaxPE));
593*8be712e4SBarry Smith     PetscCall(VecAssemblyEnd(locMaxPE));
594*8be712e4SBarry Smith     /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */
595*8be712e4SBarry Smith     if (mpimat) {
596*8be712e4SBarry Smith       const PetscScalar *buf;
597*8be712e4SBarry Smith       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
598*8be712e4SBarry Smith       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
599*8be712e4SBarry Smith       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
600*8be712e4SBarry Smith 
601*8be712e4SBarry Smith       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
602*8be712e4SBarry Smith       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
603*8be712e4SBarry Smith       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
604*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
605*8be712e4SBarry Smith       PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe));
606*8be712e4SBarry Smith       for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
607*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
608*8be712e4SBarry Smith     }
609*8be712e4SBarry Smith     { // make lid_max_pe
610*8be712e4SBarry Smith       const PetscScalar *buf;
611*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(locMaxPE, &buf));
612*8be712e4SBarry Smith       for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
613*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(locMaxPE, &buf));
614*8be712e4SBarry Smith     }
615*8be712e4SBarry Smith     /* setup sorted list of edges, and make 'Edges' */
616*8be712e4SBarry Smith     PetscCall(PetscMalloc1(nEdges0, &Edges));
617*8be712e4SBarry Smith     nEdges = 0;
618*8be712e4SBarry Smith     for (int kk = 0, n; kk < nloc; kk++) {
619*8be712e4SBarry Smith       const PetscInt lid = kk;
620*8be712e4SBarry Smith       PetscReal      tt;
621*8be712e4SBarry Smith       ii = matA->i;
622*8be712e4SBarry Smith       n  = ii[lid + 1] - ii[lid];
623*8be712e4SBarry Smith       aj = matA->j + ii[lid];
624*8be712e4SBarry Smith       ap = matA->a + ii[lid];
625*8be712e4SBarry Smith       for (int jj = 0; jj < n; jj++) {
626*8be712e4SBarry Smith         PetscInt lidj = aj[jj];
627*8be712e4SBarry Smith         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
628*8be712e4SBarry Smith           if (lidj > lid) {
629*8be712e4SBarry Smith             Edges[nEdges].lid0       = lid;
630*8be712e4SBarry Smith             Edges[nEdges].gid1       = lidj + my0;
631*8be712e4SBarry Smith             Edges[nEdges].ghost1_idx = -1;
632*8be712e4SBarry Smith             Edges[nEdges].weight     = tt;
633*8be712e4SBarry Smith             nEdges++;
634*8be712e4SBarry Smith           }
635*8be712e4SBarry Smith         }
636*8be712e4SBarry Smith       }
637*8be712e4SBarry Smith       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */
638*8be712e4SBarry Smith         ii = matB->compressedrow.i;
639*8be712e4SBarry Smith         n  = ii[ix + 1] - ii[ix];
640*8be712e4SBarry Smith         ap = matB->a + ii[ix];
641*8be712e4SBarry Smith         aj = matB->j + ii[ix];
642*8be712e4SBarry Smith         for (int jj = 0; jj < n; jj++) {
643*8be712e4SBarry Smith           if ((tt = PetscRealPart(ap[jj])) > threshold) {
644*8be712e4SBarry Smith             Edges[nEdges].lid0       = lid;
645*8be712e4SBarry Smith             Edges[nEdges].gid1       = lghost_gid[aj[jj]];
646*8be712e4SBarry Smith             Edges[nEdges].ghost1_idx = aj[jj];
647*8be712e4SBarry Smith             Edges[nEdges].weight     = tt;
648*8be712e4SBarry Smith             nEdges++;
649*8be712e4SBarry Smith           }
650*8be712e4SBarry Smith         }
651*8be712e4SBarry Smith       }
652*8be712e4SBarry Smith     }
653*8be712e4SBarry Smith     PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges);
654*8be712e4SBarry Smith     qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
655*8be712e4SBarry Smith 
656*8be712e4SBarry Smith     PetscCall(PetscInfo(info_is, "[%d] start HEM iteration %d with number edges=%d\n", rank, iter, (int)nEdges));
657*8be712e4SBarry Smith 
658*8be712e4SBarry Smith     /* projection matrix */
659*8be712e4SBarry Smith     PetscCall(MatCreate(comm, &P));
660*8be712e4SBarry Smith     PetscCall(MatSetType(P, MATAIJ));
661*8be712e4SBarry Smith     PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
662*8be712e4SBarry Smith     PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL));
663*8be712e4SBarry Smith     PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL));
664*8be712e4SBarry Smith     PetscCall(MatSetUp(P));
665*8be712e4SBarry Smith     /* process - communicate - process */
666*8be712e4SBarry Smith     for (int sub_it = 0; /* sub_it < n_sub_its */; /* sub_it++ */) {
667*8be712e4SBarry Smith       PetscInt    nactive_edges = 0, n_act_n[3], gn_act_n[3];
668*8be712e4SBarry Smith       PetscMPIInt tag1, tag2;
669*8be712e4SBarry Smith       PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew));
670*8be712e4SBarry Smith       if (isMPI) {
671*8be712e4SBarry Smith         PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew));
672*8be712e4SBarry Smith         PetscCall(PetscCommGetNewTag(comm, &tag1));
673*8be712e4SBarry Smith         PetscCall(PetscCommGetNewTag(comm, &tag2));
674*8be712e4SBarry Smith       }
675*8be712e4SBarry Smith       for (int kk = 0; kk < nEdges; kk++) {
676*8be712e4SBarry Smith         /* HEM */
677*8be712e4SBarry Smith         const Edge    *e    = &Edges[kk];
678*8be712e4SBarry Smith         const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0;
679*8be712e4SBarry Smith         PetscBool      isOK = PETSC_TRUE, print = PETSC_FALSE;
680*8be712e4SBarry Smith         if (print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1])));
681*8be712e4SBarry Smith         /* skip if either vertex is matched already */
682*8be712e4SBarry Smith         if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue;
683*8be712e4SBarry Smith 
684*8be712e4SBarry Smith         nactive_edges++;
685*8be712e4SBarry Smith         PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0]));
686*8be712e4SBarry Smith         if (print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight)));
687*8be712e4SBarry Smith         // smaller edge, lid_max_ew get updated - e0
688*8be712e4SBarry Smith         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) {
689*8be712e4SBarry Smith           if (print)
690*8be712e4SBarry Smith             PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]),
691*8be712e4SBarry Smith                                               (double)e->weight));
692*8be712e4SBarry Smith           continue; // we are basically filter edges here
693*8be712e4SBarry Smith         }
694*8be712e4SBarry Smith         // e1 - local
695*8be712e4SBarry Smith         if (ghost1_idx == -1) {
696*8be712e4SBarry Smith           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) {
697*8be712e4SBarry Smith             if (print)
698*8be712e4SBarry Smith               PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight)));
699*8be712e4SBarry Smith             continue; // we are basically filter edges here
700*8be712e4SBarry Smith           }
701*8be712e4SBarry Smith         } else { // e1 - ghost
702*8be712e4SBarry Smith           /* see if edge might get matched on other proc */
703*8be712e4SBarry Smith           PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]);
704*8be712e4SBarry Smith           if (print)
705*8be712e4SBarry Smith             PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]),
706*8be712e4SBarry Smith                                               (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx]));
707*8be712e4SBarry Smith           if (g_max_e1 > e->weight + MY_MEPS) {
708*8be712e4SBarry Smith             /* PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
709*8be712e4SBarry Smith             continue;
710*8be712e4SBarry Smith           } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_max_pe[ghost1_idx] > rank) {
711*8be712e4SBarry Smith             /* check for max_ea == to this edge and larger processor that will deal with this */
712*8be712e4SBarry Smith             if (print)
713*8be712e4SBarry Smith               PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1,
714*8be712e4SBarry Smith                                                 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight));
715*8be712e4SBarry Smith             isOK = PETSC_FALSE; // this guy could delete me
716*8be712e4SBarry Smith             continue;
717*8be712e4SBarry Smith           } else {
718*8be712e4SBarry Smith             /* PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
719*8be712e4SBarry Smith           }
720*8be712e4SBarry Smith         }
721*8be712e4SBarry Smith         /* check ghost for v0 */
722*8be712e4SBarry Smith         if (isOK) {
723*8be712e4SBarry Smith           PetscReal max_e, ew;
724*8be712e4SBarry Smith           if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
725*8be712e4SBarry Smith             int n;
726*8be712e4SBarry Smith             ii = matB->compressedrow.i;
727*8be712e4SBarry Smith             n  = ii[ix + 1] - ii[ix];
728*8be712e4SBarry Smith             ap = matB->a + ii[ix];
729*8be712e4SBarry Smith             aj = matB->j + ii[ix];
730*8be712e4SBarry Smith             for (int jj = 0; jj < n && isOK; jj++) {
731*8be712e4SBarry Smith               PetscInt lidj = aj[jj];
732*8be712e4SBarry Smith               if (lghost_matched[lidj]) continue;
733*8be712e4SBarry Smith               ew = PetscRealPart(ap[jj]);
734*8be712e4SBarry Smith               if (ew <= threshold) continue;
735*8be712e4SBarry Smith               max_e = PetscRealPart(lghost_max_ew[lidj]);
736*8be712e4SBarry Smith               /* check for max_e == to this edge and larger processor that will deal with this */
737*8be712e4SBarry Smith               if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_pe[lidj] > rank) isOK = PETSC_FALSE;
738*8be712e4SBarry Smith               PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
739*8be712e4SBarry Smith               if (print)
740*8be712e4SBarry Smith                 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS));
741*8be712e4SBarry Smith             }
742*8be712e4SBarry Smith             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
743*8be712e4SBarry Smith           }
744*8be712e4SBarry Smith           /* check local v1 */
745*8be712e4SBarry Smith           if (ghost1_idx == -1) {
746*8be712e4SBarry Smith             if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
747*8be712e4SBarry Smith               int n;
748*8be712e4SBarry Smith               ii = matB->compressedrow.i;
749*8be712e4SBarry Smith               n  = ii[ix + 1] - ii[ix];
750*8be712e4SBarry Smith               ap = matB->a + ii[ix];
751*8be712e4SBarry Smith               aj = matB->j + ii[ix];
752*8be712e4SBarry Smith               for (int jj = 0; jj < n && isOK; jj++) {
753*8be712e4SBarry Smith                 PetscInt lidj = aj[jj];
754*8be712e4SBarry Smith                 if (lghost_matched[lidj]) continue;
755*8be712e4SBarry Smith                 ew = PetscRealPart(ap[jj]);
756*8be712e4SBarry Smith                 if (ew <= threshold) continue;
757*8be712e4SBarry Smith                 max_e = PetscRealPart(lghost_max_ew[lidj]);
758*8be712e4SBarry Smith                 /* check for max_e == to this edge and larger processor that will deal with this */
759*8be712e4SBarry Smith                 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_pe[lidj] > rank) isOK = PETSC_FALSE;
760*8be712e4SBarry Smith                 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
761*8be712e4SBarry Smith                 if (print)
762*8be712e4SBarry Smith                   PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj]));
763*8be712e4SBarry Smith               }
764*8be712e4SBarry Smith             }
765*8be712e4SBarry Smith             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
766*8be712e4SBarry Smith           }
767*8be712e4SBarry Smith         }
768*8be712e4SBarry Smith         PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx]));
769*8be712e4SBarry Smith         if (print)
770*8be712e4SBarry Smith           PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 wight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK));
771*8be712e4SBarry Smith         /* do it */
772*8be712e4SBarry Smith         if (isOK) {
773*8be712e4SBarry Smith           if (ghost1_idx == -1) {
774*8be712e4SBarry Smith             PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1);
775*8be712e4SBarry Smith             lid_matched[lid1] = PETSC_TRUE;                       /* keep track of what we've done this round */
776*8be712e4SBarry Smith             PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's
777*8be712e4SBarry Smith           } else {
778*8be712e4SBarry Smith             /* add gid1 to list of ghost deleted by me -- I need their children */
779*8be712e4SBarry Smith             PetscMPIInt proc = lghost_pe[ghost1_idx];
780*8be712e4SBarry Smith             PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]);
781*8be712e4SBarry Smith             lghost_matched[ghost1_idx] = PETSC_TRUE;
782*8be712e4SBarry Smith             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */
783*8be712e4SBarry Smith             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0));
784*8be712e4SBarry Smith           }
785*8be712e4SBarry Smith           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
786*8be712e4SBarry Smith           /* set projection */
787*8be712e4SBarry Smith           PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
788*8be712e4SBarry Smith           PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
789*8be712e4SBarry Smith           //PetscCall(PetscPrintf(PETSC_COMM_WORLD,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1));
790*8be712e4SBarry Smith         } /* matched */
791*8be712e4SBarry Smith       }   /* edge loop */
792*8be712e4SBarry Smith       PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));
793*8be712e4SBarry Smith       if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
794*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew));
795*8be712e4SBarry Smith       // count active for test, latter, update deleted ghosts
796*8be712e4SBarry Smith       n_act_n[0] = nactive_edges;
797*8be712e4SBarry Smith       if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2]));
798*8be712e4SBarry Smith       else n_act_n[2] = 0;
799*8be712e4SBarry Smith       PetscCall(PetscCDCount(agg_llists, &n_act_n[1]));
800*8be712e4SBarry Smith       PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm));
801*8be712e4SBarry Smith       PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1]));
802*8be712e4SBarry Smith       /* deal with deleted ghost */
803*8be712e4SBarry Smith       if (isMPI) {
804*8be712e4SBarry Smith         PetscCDIntNd *pos;
805*8be712e4SBarry Smith         PetscInt     *sbuffs1[REQ_BF_SIZE], ndel;
806*8be712e4SBarry Smith         PetscInt     *sbuffs2[REQ_BF_SIZE];
807*8be712e4SBarry Smith         MPI_Status    status;
808*8be712e4SBarry Smith 
809*8be712e4SBarry Smith         /* send deleted ghosts */
810*8be712e4SBarry Smith         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
811*8be712e4SBarry Smith           const PetscMPIInt proc = comm_procs[proc_idx];
812*8be712e4SBarry Smith           PetscInt         *sbuff, *pt, scount;
813*8be712e4SBarry Smith           MPI_Request      *request;
814*8be712e4SBarry Smith           /* count ghosts */
815*8be712e4SBarry Smith           PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel));
816*8be712e4SBarry Smith           ndel /= 2; // two entries for each proc
817*8be712e4SBarry Smith           scount = 2 + 2 * ndel;
818*8be712e4SBarry Smith           PetscCall(PetscMalloc1(scount + request_size, &sbuff));
819*8be712e4SBarry Smith           /* save requests */
820*8be712e4SBarry Smith           sbuffs1[proc_idx] = sbuff;
821*8be712e4SBarry Smith           request           = (MPI_Request *)sbuff;
822*8be712e4SBarry Smith           sbuff = pt = (PetscInt *)(sbuff + request_size);
823*8be712e4SBarry Smith           /* write [ndel, proc, n*[gid1,gid0] */
824*8be712e4SBarry Smith           *pt++ = ndel; // number of deleted to send
825*8be712e4SBarry Smith           *pt++ = rank; // proc (not used)
826*8be712e4SBarry Smith           PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos));
827*8be712e4SBarry Smith           while (pos) {
828*8be712e4SBarry Smith             PetscInt lid0, ghost_idx, gid1;
829*8be712e4SBarry Smith             PetscCall(PetscCDIntNdGetID(pos, &ghost_idx));
830*8be712e4SBarry Smith             gid1 = lghost_gid[ghost_idx];
831*8be712e4SBarry Smith             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
832*8be712e4SBarry Smith             PetscCall(PetscCDIntNdGetID(pos, &lid0));
833*8be712e4SBarry Smith             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
834*8be712e4SBarry Smith             *pt++ = gid1;
835*8be712e4SBarry Smith             *pt++ = lid0 + my0; // gid0
836*8be712e4SBarry Smith           }
837*8be712e4SBarry Smith           PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff));
838*8be712e4SBarry Smith           /* MPI_Isend:  tag1 [ndel, proc, n*[gid1,gid0] ] */
839*8be712e4SBarry Smith           PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request));
840*8be712e4SBarry Smith           PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list
841*8be712e4SBarry Smith         }
842*8be712e4SBarry Smith         /* receive deleted, send back partial aggregates, clear lists */
843*8be712e4SBarry Smith         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
844*8be712e4SBarry Smith           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status));
845*8be712e4SBarry Smith           {
846*8be712e4SBarry Smith             PetscInt         *pt, *pt2, *pt3, *sbuff, tmp;
847*8be712e4SBarry Smith             MPI_Request      *request;
848*8be712e4SBarry Smith             int               rcount, scount, ndel;
849*8be712e4SBarry Smith             const PetscMPIInt proc = status.MPI_SOURCE;
850*8be712e4SBarry Smith             PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
851*8be712e4SBarry Smith             if (rcount > rbuff_sz) {
852*8be712e4SBarry Smith               if (rbuff) PetscCall(PetscFree(rbuff));
853*8be712e4SBarry Smith               PetscCall(PetscMalloc1(rcount, &rbuff));
854*8be712e4SBarry Smith               rbuff_sz = rcount;
855*8be712e4SBarry Smith             }
856*8be712e4SBarry Smith             /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */
857*8be712e4SBarry Smith             PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status));
858*8be712e4SBarry Smith             /* read and count sends *[lid0, n, n*[gid] ] */
859*8be712e4SBarry Smith             pt     = rbuff;
860*8be712e4SBarry Smith             scount = 0;
861*8be712e4SBarry Smith             ndel   = *pt++; // number of deleted to recv
862*8be712e4SBarry Smith             tmp    = *pt++; // proc (not used)
863*8be712e4SBarry Smith             while (ndel--) {
864*8be712e4SBarry Smith               PetscInt gid1 = *pt++, lid1 = gid1 - my0;
865*8be712e4SBarry Smith               int      gh_gid0 = *pt++; // gid on other proc (not used here to count)
866*8be712e4SBarry Smith               PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1);
867*8be712e4SBarry Smith               PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc);
868*8be712e4SBarry Smith               lid_matched[lid1] = PETSC_TRUE;                    /* keep track of what we've done this round */
869*8be712e4SBarry Smith               PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n
870*8be712e4SBarry Smith               /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */
871*8be712e4SBarry Smith               scount += tmp + 2; // lid0, n, n*[gid]
872*8be712e4SBarry Smith             }
873*8be712e4SBarry Smith             PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount);
874*8be712e4SBarry Smith             /* send tag2: *[gid0, n, n*[gid] ] */
875*8be712e4SBarry Smith             PetscCall(PetscMalloc1(scount + request_size, &sbuff));
876*8be712e4SBarry Smith             sbuffs2[proc_idx] = sbuff; /* cache request */
877*8be712e4SBarry Smith             request           = (MPI_Request *)sbuff;
878*8be712e4SBarry Smith             pt2 = sbuff = sbuff + request_size;
879*8be712e4SBarry Smith             // read again: n, proc, n*[gid1,gid0]
880*8be712e4SBarry Smith             pt   = rbuff;
881*8be712e4SBarry Smith             ndel = *pt++;
882*8be712e4SBarry Smith             tmp  = *pt++; // proc (not used)
883*8be712e4SBarry Smith             while (ndel--) {
884*8be712e4SBarry Smith               PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++;
885*8be712e4SBarry Smith               /* write [gid0, aggSz, aggSz[gid] ] */
886*8be712e4SBarry Smith               *pt2++ = gh_gid0;
887*8be712e4SBarry Smith               pt3    = pt2++; /* save pointer for later */
888*8be712e4SBarry Smith               PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
889*8be712e4SBarry Smith               while (pos) {
890*8be712e4SBarry Smith                 PetscInt gid;
891*8be712e4SBarry Smith                 PetscCall(PetscCDIntNdGetID(pos, &gid));
892*8be712e4SBarry Smith                 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
893*8be712e4SBarry Smith                 *pt2++ = gid;
894*8be712e4SBarry Smith               }
895*8be712e4SBarry Smith               *pt3 = (pt2 - pt3) - 1;
896*8be712e4SBarry Smith               /* clear list */
897*8be712e4SBarry Smith               PetscCall(PetscCDRemoveAllAt(agg_llists, lid1));
898*8be712e4SBarry Smith             }
899*8be712e4SBarry Smith             PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount);
900*8be712e4SBarry Smith             /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */
901*8be712e4SBarry Smith             PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request));
902*8be712e4SBarry Smith           }
903*8be712e4SBarry Smith         } // proc_idx
904*8be712e4SBarry Smith         /* receive tag2 *[gid0, n, n*[gid] ] */
905*8be712e4SBarry Smith         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
906*8be712e4SBarry Smith           PetscMPIInt proc;
907*8be712e4SBarry Smith           PetscInt   *pt;
908*8be712e4SBarry Smith           int         rcount;
909*8be712e4SBarry Smith           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status));
910*8be712e4SBarry Smith           PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
911*8be712e4SBarry Smith           if (rcount > rbuff_sz) {
912*8be712e4SBarry Smith             if (rbuff) PetscCall(PetscFree(rbuff));
913*8be712e4SBarry Smith             PetscCall(PetscMalloc1(rcount, &rbuff));
914*8be712e4SBarry Smith             rbuff_sz = rcount;
915*8be712e4SBarry Smith           }
916*8be712e4SBarry Smith           proc = status.MPI_SOURCE;
917*8be712e4SBarry Smith           /* MPI_Recv:  tag1 [n, proc, n*[gid1,lid0] ] */
918*8be712e4SBarry Smith           PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status));
919*8be712e4SBarry Smith           pt = rbuff;
920*8be712e4SBarry Smith           while (pt - rbuff < rcount) {
921*8be712e4SBarry Smith             PetscInt gid0 = *pt++, n = *pt++;
922*8be712e4SBarry Smith             while (n--) {
923*8be712e4SBarry Smith               PetscInt gid1 = *pt++;
924*8be712e4SBarry Smith               PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1));
925*8be712e4SBarry Smith             }
926*8be712e4SBarry Smith           }
927*8be712e4SBarry Smith           PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount);
928*8be712e4SBarry Smith         }
929*8be712e4SBarry Smith         /* wait for tag1 isends */
930*8be712e4SBarry Smith         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
931*8be712e4SBarry Smith           MPI_Request *request;
932*8be712e4SBarry Smith           request = (MPI_Request *)sbuffs1[proc_idx];
933*8be712e4SBarry Smith           PetscCallMPI(MPI_Wait(request, &status));
934*8be712e4SBarry Smith           PetscCall(PetscFree(sbuffs1[proc_idx]));
935*8be712e4SBarry Smith         }
936*8be712e4SBarry Smith         /* wait for tag2 isends */
937*8be712e4SBarry Smith         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
938*8be712e4SBarry Smith           MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx];
939*8be712e4SBarry Smith           PetscCallMPI(MPI_Wait(request, &status));
940*8be712e4SBarry Smith           PetscCall(PetscFree(sbuffs2[proc_idx]));
941*8be712e4SBarry Smith         }
942*8be712e4SBarry Smith       } /* MPI */
943*8be712e4SBarry Smith       /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */
944*8be712e4SBarry Smith       if (isMPI) {
945*8be712e4SBarry Smith         const PetscScalar *sbuff;
946*8be712e4SBarry Smith         for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
947*8be712e4SBarry Smith           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
948*8be712e4SBarry Smith           PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
949*8be712e4SBarry Smith         }
950*8be712e4SBarry Smith         PetscCall(VecAssemblyBegin(locMaxEdge));
951*8be712e4SBarry Smith         PetscCall(VecAssemblyEnd(locMaxEdge));
952*8be712e4SBarry Smith         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
953*8be712e4SBarry Smith         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
954*8be712e4SBarry Smith         PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff));
955*8be712e4SBarry Smith         for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); }
956*8be712e4SBarry Smith         PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff));
957*8be712e4SBarry Smith       }
958*8be712e4SBarry Smith       /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */
959*8be712e4SBarry Smith       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
960*8be712e4SBarry Smith         PetscReal      max_e = 0., tt;
961*8be712e4SBarry Smith         PetscScalar    vval;
962*8be712e4SBarry Smith         const PetscInt lid    = kk;
963*8be712e4SBarry Smith         int            max_pe = rank, pe, n;
964*8be712e4SBarry Smith         ii                    = matA->i;
965*8be712e4SBarry Smith         n                     = ii[lid + 1] - ii[lid];
966*8be712e4SBarry Smith         aj                    = matA->j + ii[lid];
967*8be712e4SBarry Smith         ap                    = matA->a + ii[lid];
968*8be712e4SBarry Smith         for (int jj = 0; jj < n; jj++) {
969*8be712e4SBarry Smith           PetscInt lidj = aj[jj];
970*8be712e4SBarry Smith           if (lid_matched[lidj]) continue; /* this is new - can change local max */
971*8be712e4SBarry Smith           if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
972*8be712e4SBarry Smith         }
973*8be712e4SBarry Smith         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
974*8be712e4SBarry Smith           ii = matB->compressedrow.i;
975*8be712e4SBarry Smith           n  = ii[ix + 1] - ii[ix];
976*8be712e4SBarry Smith           ap = matB->a + ii[ix];
977*8be712e4SBarry Smith           aj = matB->j + ii[ix];
978*8be712e4SBarry Smith           for (int jj = 0; jj < n; jj++) {
979*8be712e4SBarry Smith             PetscInt lidj = aj[jj];
980*8be712e4SBarry Smith             if (lghost_matched[lidj]) continue;
981*8be712e4SBarry Smith             if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
982*8be712e4SBarry Smith           }
983*8be712e4SBarry Smith         }
984*8be712e4SBarry Smith         vval = (PetscScalar)max_e;
985*8be712e4SBarry Smith         PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
986*8be712e4SBarry Smith         // max PE with max edge
987*8be712e4SBarry Smith         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
988*8be712e4SBarry Smith           ii = matB->compressedrow.i;
989*8be712e4SBarry Smith           n  = ii[ix + 1] - ii[ix];
990*8be712e4SBarry Smith           ap = matB->a + ii[ix];
991*8be712e4SBarry Smith           aj = matB->j + ii[ix];
992*8be712e4SBarry Smith           for (int jj = 0; jj < n; jj++) {
993*8be712e4SBarry Smith             PetscInt lidj = aj[jj];
994*8be712e4SBarry Smith             if (lghost_matched[lidj]) continue;
995*8be712e4SBarry Smith             if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; }
996*8be712e4SBarry Smith           }
997*8be712e4SBarry Smith         }
998*8be712e4SBarry Smith         vval = (PetscScalar)max_pe;
999*8be712e4SBarry Smith         PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
1000*8be712e4SBarry Smith       }
1001*8be712e4SBarry Smith       PetscCall(VecAssemblyBegin(locMaxEdge));
1002*8be712e4SBarry Smith       PetscCall(VecAssemblyEnd(locMaxEdge));
1003*8be712e4SBarry Smith       PetscCall(VecAssemblyBegin(locMaxPE));
1004*8be712e4SBarry Smith       PetscCall(VecAssemblyEnd(locMaxPE));
1005*8be712e4SBarry Smith       /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/
1006*8be712e4SBarry Smith       if (isMPI) {
1007*8be712e4SBarry Smith         const PetscScalar *buf;
1008*8be712e4SBarry Smith         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1009*8be712e4SBarry Smith         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1010*8be712e4SBarry Smith         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1011*8be712e4SBarry Smith         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1012*8be712e4SBarry Smith         PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
1013*8be712e4SBarry Smith         for (int kk = 0; kk < num_ghosts; kk++) {
1014*8be712e4SBarry Smith           lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
1015*8be712e4SBarry Smith         }
1016*8be712e4SBarry Smith         PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
1017*8be712e4SBarry Smith       }
1018*8be712e4SBarry Smith       // if no active edges, stop
1019*8be712e4SBarry Smith       if (gn_act_n[0] < 1) break;
1020*8be712e4SBarry Smith       // inc and check (self stopping iteration
1021*8be712e4SBarry Smith       sub_it++;
1022*8be712e4SBarry Smith       PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its);
1023*8be712e4SBarry Smith     } /* sub_it loop */
1024*8be712e4SBarry Smith     /* clean up iteration */
1025*8be712e4SBarry Smith     PetscCall(PetscFree(Edges));
1026*8be712e4SBarry Smith     if (mpimat) { // can be hoisted
1027*8be712e4SBarry Smith       PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
1028*8be712e4SBarry Smith       PetscCall(VecDestroy(&ghostMaxEdge));
1029*8be712e4SBarry Smith       PetscCall(VecDestroy(&ghostMaxPE));
1030*8be712e4SBarry Smith       PetscCall(PetscFree(lghost_pe));
1031*8be712e4SBarry Smith       PetscCall(PetscFree(lghost_gid));
1032*8be712e4SBarry Smith       PetscCall(PetscFree(lghost_matched));
1033*8be712e4SBarry Smith       PetscCall(PetscFree(lghost_max_pe));
1034*8be712e4SBarry Smith     }
1035*8be712e4SBarry Smith     PetscCall(VecDestroy(&locMaxEdge));
1036*8be712e4SBarry Smith     PetscCall(VecDestroy(&locMaxPE));
1037*8be712e4SBarry Smith     /* create next graph */
1038*8be712e4SBarry Smith     {
1039*8be712e4SBarry Smith       Vec diag;
1040*8be712e4SBarry Smith       /* add identity for unmatched vertices so they stay alive */
1041*8be712e4SBarry Smith       for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) {
1042*8be712e4SBarry Smith         if (!lid_matched[kk]) {
1043*8be712e4SBarry Smith           const PetscInt lid = kk;
1044*8be712e4SBarry Smith           PetscCDIntNd  *pos;
1045*8be712e4SBarry Smith           PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1046*8be712e4SBarry Smith           PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid);
1047*8be712e4SBarry Smith           PetscCall(PetscCDIntNdGetID(pos, &gid1));
1048*8be712e4SBarry Smith           PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid);
1049*8be712e4SBarry Smith           PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
1050*8be712e4SBarry Smith         }
1051*8be712e4SBarry Smith       }
1052*8be712e4SBarry Smith       PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
1053*8be712e4SBarry Smith       PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));
1054*8be712e4SBarry Smith 
1055*8be712e4SBarry Smith       /* project to make new graph with collapsed edges */
1056*8be712e4SBarry Smith       PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1057*8be712e4SBarry Smith       PetscCall(MatDestroy(&P));
1058*8be712e4SBarry Smith       PetscCall(MatDestroy(&cMat));
1059*8be712e4SBarry Smith       cMat = tMat;
1060*8be712e4SBarry Smith       PetscCall(MatCreateVecs(cMat, &diag, NULL));
1061*8be712e4SBarry Smith       PetscCall(MatGetDiagonal(cMat, diag)); /* effectively PCJACOBI */
1062*8be712e4SBarry Smith       PetscCall(VecReciprocal(diag));
1063*8be712e4SBarry Smith       PetscCall(VecSqrtAbs(diag));
1064*8be712e4SBarry Smith       PetscCall(MatDiagonalScale(cMat, diag, diag));
1065*8be712e4SBarry Smith       PetscCall(VecDestroy(&diag));
1066*8be712e4SBarry Smith     }
1067*8be712e4SBarry Smith   } /* coarsen iterator */
1068*8be712e4SBarry Smith 
1069*8be712e4SBarry Smith   /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */
1070*8be712e4SBarry Smith   if (size > 1) {
1071*8be712e4SBarry Smith     Mat           mat;
1072*8be712e4SBarry Smith     PetscCDIntNd *pos;
1073*8be712e4SBarry Smith     PetscInt      NN, MM, jj = 0, mxsz = 0;
1074*8be712e4SBarry Smith 
1075*8be712e4SBarry Smith     for (int kk = 0; kk < nloc; kk++) {
1076*8be712e4SBarry Smith       PetscCall(PetscCDCountAt(agg_llists, kk, &jj));
1077*8be712e4SBarry Smith       if (jj > mxsz) mxsz = jj;
1078*8be712e4SBarry Smith     }
1079*8be712e4SBarry Smith     PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1080*8be712e4SBarry Smith     if (mxsz > MM - nloc) mxsz = MM - nloc;
1081*8be712e4SBarry Smith     /* matrix of ghost adj for square graph */
1082*8be712e4SBarry Smith     PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));
1083*8be712e4SBarry Smith     for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) {
1084*8be712e4SBarry Smith       PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1085*8be712e4SBarry Smith       while (pos) {
1086*8be712e4SBarry Smith         PetscInt gid1;
1087*8be712e4SBarry Smith         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1088*8be712e4SBarry Smith         PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
1089*8be712e4SBarry Smith         if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1090*8be712e4SBarry Smith       }
1091*8be712e4SBarry Smith     }
1092*8be712e4SBarry Smith     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1093*8be712e4SBarry Smith     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1094*8be712e4SBarry Smith     PetscCall(PetscCDSetMat(agg_llists, mat));
1095*8be712e4SBarry Smith     PetscCall(PetscCDDestroy(ghost_deleted_list));
1096*8be712e4SBarry Smith     if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true
1097*8be712e4SBarry Smith   }
1098*8be712e4SBarry Smith   // move BCs into some node
1099*8be712e4SBarry Smith   if (bc_list) {
1100*8be712e4SBarry Smith     PetscCDIntNd *pos;
1101*8be712e4SBarry Smith     PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos));
1102*8be712e4SBarry Smith     while (pos) {
1103*8be712e4SBarry Smith       PetscInt gid1;
1104*8be712e4SBarry Smith       PetscCall(PetscCDIntNdGetID(pos, &gid1));
1105*8be712e4SBarry Smith       PetscCall(PetscCDGetNextPos(bc_list, 0, &pos));
1106*8be712e4SBarry Smith       PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1));
1107*8be712e4SBarry Smith     }
1108*8be712e4SBarry Smith     PetscCall(PetscCDRemoveAllAt(bc_list, 0));
1109*8be712e4SBarry Smith     PetscCall(PetscCDDestroy(bc_list));
1110*8be712e4SBarry Smith   }
1111*8be712e4SBarry Smith   {
1112*8be712e4SBarry Smith     // check sizes -- all vertices must get in graph
1113*8be712e4SBarry Smith     PetscInt sz, globalsz, MM;
1114*8be712e4SBarry Smith     PetscCall(MatGetSize(a_Gmat, &MM, NULL));
1115*8be712e4SBarry Smith     PetscCall(PetscCDCount(agg_llists, &sz));
1116*8be712e4SBarry Smith     PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm));
1117*8be712e4SBarry Smith     PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz));
1118*8be712e4SBarry Smith   }
1119*8be712e4SBarry Smith   // cleanup
1120*8be712e4SBarry Smith   PetscCall(MatDestroy(&cMat));
1121*8be712e4SBarry Smith   PetscCall(PetscFree(lid_cprowID));
1122*8be712e4SBarry Smith   PetscCall(PetscFree(lid_max_pe));
1123*8be712e4SBarry Smith   PetscCall(PetscFree(lid_matched));
1124*8be712e4SBarry Smith   PetscCall(ISDestroy(&info_is));
1125*8be712e4SBarry Smith 
1126*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
1127*8be712e4SBarry Smith }
1128*8be712e4SBarry Smith 
1129*8be712e4SBarry Smith /*
1130*8be712e4SBarry Smith    HEM coarsen, simple greedy.
1131*8be712e4SBarry Smith */
1132*8be712e4SBarry Smith static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1133*8be712e4SBarry Smith {
1134*8be712e4SBarry Smith   Mat mat = coarse->graph;
1135*8be712e4SBarry Smith 
1136*8be712e4SBarry Smith   PetscFunctionBegin;
1137*8be712e4SBarry Smith   PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists));
1138*8be712e4SBarry Smith 
1139*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
1140*8be712e4SBarry Smith }
1141*8be712e4SBarry Smith 
1142*8be712e4SBarry Smith static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1143*8be712e4SBarry Smith {
1144*8be712e4SBarry Smith   PetscMPIInt rank;
1145*8be712e4SBarry Smith   PetscBool   iascii;
1146*8be712e4SBarry Smith 
1147*8be712e4SBarry Smith   PetscFunctionBegin;
1148*8be712e4SBarry Smith   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1149*8be712e4SBarry Smith   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1150*8be712e4SBarry Smith   if (iascii) {
1151*8be712e4SBarry Smith     PetscCDIntNd *pos, *pos2;
1152*8be712e4SBarry Smith     PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold));
1153*8be712e4SBarry Smith     PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1154*8be712e4SBarry Smith     for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) {
1155*8be712e4SBarry Smith       PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos));
1156*8be712e4SBarry Smith       if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk));
1157*8be712e4SBarry Smith       while (pos) {
1158*8be712e4SBarry Smith         PetscInt gid1;
1159*8be712e4SBarry Smith         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1160*8be712e4SBarry Smith         PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos));
1161*8be712e4SBarry Smith         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1));
1162*8be712e4SBarry Smith       }
1163*8be712e4SBarry Smith       if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1164*8be712e4SBarry Smith     }
1165*8be712e4SBarry Smith     PetscCall(PetscViewerFlush(viewer));
1166*8be712e4SBarry Smith     PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1167*8be712e4SBarry Smith   }
1168*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
1169*8be712e4SBarry Smith }
1170*8be712e4SBarry Smith 
1171*8be712e4SBarry Smith /*
1172*8be712e4SBarry Smith   MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener
1173*8be712e4SBarry Smith */
1174*8be712e4SBarry Smith PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1175*8be712e4SBarry Smith {
1176*8be712e4SBarry Smith   PetscFunctionBegin;
1177*8be712e4SBarry Smith   coarse->ops->apply = MatCoarsenApply_HEM;
1178*8be712e4SBarry Smith   coarse->ops->view  = MatCoarsenView_HEM;
1179*8be712e4SBarry Smith   coarse->max_it     = 4;
1180*8be712e4SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
1181*8be712e4SBarry Smith }
1182