xref: /petsc/src/ksp/pc/impls/tfs/xxt.c (revision e0c929782f653733b5b44051f7bcff439f1f56d2)
1 #define PETSCKSP_DLL
2 
3 /*************************************xxt.c************************************
4 Module Name: xxt
5 Module Info:
6 
7 author:  Henry M. Tufo III
8 e-mail:  hmt@asci.uchicago.edu
9 contact:
10 +--------------------------------+--------------------------------+
11 |MCS Division - Building 221     |Department of Computer Science  |
12 |Argonne National Laboratory     |Ryerson 152                     |
13 |9700 S. Cass Avenue             |The University of Chicago       |
14 |Argonne, IL  60439              |Chicago, IL  60637              |
15 |(630) 252-5354/5986 ph/fx       |(773) 702-6019/8487 ph/fx       |
16 +--------------------------------+--------------------------------+
17 
18 Last Modification: 3.20.01
19 **************************************xxt.c***********************************/
20 
21 
22 /*************************************xxt.c************************************
23 NOTES ON USAGE:
24 
25 **************************************xxt.c***********************************/
26 #include "src/ksp/pc/impls/tfs/tfs.h"
27 
28 #define LEFT  -1
29 #define RIGHT  1
30 #define BOTH   0
31 
32 typedef struct xxt_solver_info {
33   int n, m, n_global, m_global;
34   int nnz, max_nnz, msg_buf_sz;
35   int *nsep, *lnsep, *fo, nfo, *stages;
36   int *col_sz, *col_indices;
37   PetscScalar **col_vals, *x, *solve_uu, *solve_w;
38   int nsolves;
39   PetscScalar tot_solve_time;
40 } xxt_info;
41 
42 typedef struct matvec_info {
43   int n, m, n_global, m_global;
44   int *local2global;
45   gs_ADT gs_handle;
46   PetscErrorCode (*matvec)(struct matvec_info*,PetscScalar*,PetscScalar*);
47   void *grid_data;
48 } mv_info;
49 
50 struct xxt_CDT{
51   int id;
52   int ns;
53   int level;
54   xxt_info *info;
55   mv_info  *mvi;
56 };
57 
58 static int n_xxt=0;
59 static int n_xxt_handles=0;
60 
61 /* prototypes */
62 static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs);
63 static PetscErrorCode check_handle(xxt_ADT xxt_handle);
64 static PetscErrorCode det_separators(xxt_ADT xxt_handle);
65 static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u);
66 static int xxt_generate(xxt_ADT xxt_handle);
67 static int do_xxt_factor(xxt_ADT xxt_handle);
68 static mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data);
69 
70 
71 
72 /*************************************xxt.c************************************
73 Function: XXT_new()
74 
75 Input :
76 Output:
77 Return:
78 Description:
79 **************************************xxt.c***********************************/
80 xxt_ADT
81 XXT_new(void)
82 {
83   xxt_ADT xxt_handle;
84 
85 
86 
87   /* rolling count on n_xxt ... pot. problem here */
88   n_xxt_handles++;
89   xxt_handle       = (xxt_ADT)malloc(sizeof(struct xxt_CDT));
90   xxt_handle->id   = ++n_xxt;
91   xxt_handle->info = NULL; xxt_handle->mvi  = NULL;
92 
93   return(xxt_handle);
94 }
95 
96 
97 /*************************************xxt.c************************************
98 Function: XXT_factor()
99 
100 Input :
101 Output:
102 Return:
103 Description:
104 **************************************xxt.c***********************************/
105 int
106 XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt  handle */
107 	   int *local2global,  /* global column mapping       */
108 	   int n,              /* local num rows              */
109 	   int m,              /* local num cols              */
110 	   void *matvec,       /* b_loc=A_local.x_loc         */
111 	   void *grid_data     /* grid data for matvec        */
112 	   )
113 {
114   comm_init();
115   check_handle(xxt_handle);
116 
117   /* only 2^k for now and all nodes participating */
118   if ((1<<(xxt_handle->level=i_log2_num_nodes))!=num_nodes)
119     {error_msg_fatal("only 2^k for now and MPI_COMM_WORLD!!! %d != %d\n",1<<i_log2_num_nodes,num_nodes);}
120 
121   /* space for X info */
122   xxt_handle->info = (xxt_info*)malloc(sizeof(xxt_info));
123 
124   /* set up matvec handles */
125   xxt_handle->mvi  = set_mvi(local2global, n, m, matvec, grid_data);
126 
127   /* matrix is assumed to be of full rank */
128   /* LATER we can reset to indicate rank def. */
129   xxt_handle->ns=0;
130 
131   /* determine separators and generate firing order - NB xxt info set here */
132   det_separators(xxt_handle);
133 
134   return(do_xxt_factor(xxt_handle));
135 }
136 
137 
138 /*************************************xxt.c************************************
139 Function: XXT_solve
140 
141 Input :
142 Output:
143 Return:
144 Description:
145 **************************************xxt.c***********************************/
146 int
147 XXT_solve(xxt_ADT xxt_handle, double *x, double *b)
148 {
149 
150   comm_init();
151   check_handle(xxt_handle);
152 
153   /* need to copy b into x? */
154   if (b)
155     {rvec_copy(x,b,xxt_handle->mvi->n);}
156   do_xxt_solve(xxt_handle,x);
157 
158   return(0);
159 }
160 
161 
162 /*************************************xxt.c************************************
163 Function: XXT_free()
164 
165 Input :
166 Output:
167 Return:
168 Description:
169 **************************************xxt.c***********************************/
170 int
171 XXT_free(xxt_ADT xxt_handle)
172 {
173 
174   comm_init();
175   check_handle(xxt_handle);
176   n_xxt_handles--;
177 
178   free(xxt_handle->info->nsep);
179   free(xxt_handle->info->lnsep);
180   free(xxt_handle->info->fo);
181   free(xxt_handle->info->stages);
182   free(xxt_handle->info->solve_uu);
183   free(xxt_handle->info->solve_w);
184   free(xxt_handle->info->x);
185   free(xxt_handle->info->col_vals);
186   free(xxt_handle->info->col_sz);
187   free(xxt_handle->info->col_indices);
188   free(xxt_handle->info);
189   free(xxt_handle->mvi->local2global);
190    gs_free(xxt_handle->mvi->gs_handle);
191   free(xxt_handle->mvi);
192   free(xxt_handle);
193 
194 
195 
196   /* if the check fails we nuke */
197   /* if NULL pointer passed to free we nuke */
198   /* if the calls to free fail that's not my problem */
199   return(0);
200 }
201 
202 
203 
204 /*************************************xxt.c************************************
205 Function:
206 
207 Input :
208 Output:
209 Return:
210 Description:
211 **************************************xxt.c***********************************/
212 int
213 XXT_stats(xxt_ADT xxt_handle)
214 {
215   int  op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
216   int fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
217   int   vals[9],  work[9];
218   PetscScalar fvals[3], fwork[3];
219 
220 
221 
222   comm_init();
223   check_handle(xxt_handle);
224 
225   /* if factorization not done there are no stats */
226   if (!xxt_handle->info||!xxt_handle->mvi)
227     {
228       if (!my_id)
229 	{printf("XXT_stats() :: no stats available!\n");}
230       return 1;
231     }
232 
233   vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz;
234   vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n;
235   vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz;
236   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
237 
238   fvals[0]=fvals[1]=fvals[2]
239     =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++;
240   grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);
241 
242   if (!my_id)
243     {
244       printf("%d :: min   xxt_nnz=%d\n",my_id,vals[0]);
245       printf("%d :: max   xxt_nnz=%d\n",my_id,vals[1]);
246       printf("%d :: avg   xxt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes);
247       printf("%d :: tot   xxt_nnz=%d\n",my_id,vals[2]);
248       printf("%d :: xxt   C(2d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5)));
249       printf("%d :: xxt   C(3d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667)));
250       printf("%d :: min   xxt_n  =%d\n",my_id,vals[3]);
251       printf("%d :: max   xxt_n  =%d\n",my_id,vals[4]);
252       printf("%d :: avg   xxt_n  =%g\n",my_id,1.0*vals[5]/num_nodes);
253       printf("%d :: tot   xxt_n  =%d\n",my_id,vals[5]);
254       printf("%d :: min   xxt_buf=%d\n",my_id,vals[6]);
255       printf("%d :: max   xxt_buf=%d\n",my_id,vals[7]);
256       printf("%d :: avg   xxt_buf=%g\n",my_id,1.0*vals[8]/num_nodes);
257       printf("%d :: min   xxt_slv=%g\n",my_id,fvals[0]);
258       printf("%d :: max   xxt_slv=%g\n",my_id,fvals[1]);
259       printf("%d :: avg   xxt_slv=%g\n",my_id,fvals[2]/num_nodes);
260     }
261 
262   return(0);
263 }
264 
265 
266 /*************************************xxt.c************************************
267 Function: do_xxt_factor
268 
269 Input :
270 Output:
271 Return:
272 Description: get A_local, local portion of global coarse matrix which
273 is a row dist. nxm matrix w/ n<m.
274    o my_ml holds address of ML struct associated w/A_local and coarse grid
275    o local2global holds global number of column i (i=0,...,m-1)
276    o local2global holds global number of row    i (i=0,...,n-1)
277    o mylocmatvec performs A_local . vec_local (note that gs is performed using
278    gs_init/gop).
279 
280 mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
281 mylocmatvec (void :: void *data, double *in, double *out)
282 **************************************xxt.c***********************************/
283 static int do_xxt_factor(xxt_ADT xxt_handle)
284 {
285   int flag;
286 
287 
288   flag=xxt_generate(xxt_handle);
289 
290   return(flag);
291 }
292 
293 
294 /*************************************xxt.c************************************
295 Function:
296 
297 Input :
298 Output:
299 Return:
300 Description:
301 **************************************xxt.c***********************************/
302 static int xxt_generate(xxt_ADT xxt_handle)
303 {
304   int i,j,k,idex;
305   int dim, col;
306   PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w;
307   int *segs;
308   int op[] = {GL_ADD,0};
309   int off, len;
310   PetscScalar *x_ptr;
311   int *iptr, flag;
312   int start=0, end, work;
313   int op2[] = {GL_MIN,0};
314   gs_ADT gs_handle;
315   int *nsep, *lnsep, *fo;
316   int a_n=xxt_handle->mvi->n;
317   int a_m=xxt_handle->mvi->m;
318   int *a_local2global=xxt_handle->mvi->local2global;
319   int level;
320   int xxt_nnz=0, xxt_max_nnz=0;
321   int n, m;
322   int *col_sz, *col_indices, *stages;
323   PetscScalar **col_vals, *x;
324   int n_global;
325   int xxt_zero_nnz=0;
326   int xxt_zero_nnz_0=0;
327   PetscBLASInt i1 = 1;
328   PetscScalar dm1 = -1.0;
329 
330   n=xxt_handle->mvi->n;
331   nsep=xxt_handle->info->nsep;
332   lnsep=xxt_handle->info->lnsep;
333   fo=xxt_handle->info->fo;
334   end=lnsep[0];
335   level=xxt_handle->level;
336   gs_handle=xxt_handle->mvi->gs_handle;
337 
338   /* is there a null space? */
339   /* LATER add in ability to detect null space by checking alpha */
340   for (i=0, j=0; i<=level; i++)
341     {j+=nsep[i];}
342 
343   m = j-xxt_handle->ns;
344   if (m!=j)
345     {printf("xxt_generate() :: null space exists %d %d %d\n",m,j,xxt_handle->ns);}
346 
347   /* get and initialize storage for x local         */
348   /* note that x local is nxm and stored by columns */
349   col_sz = (int*) malloc(m*sizeof(PetscInt));
350   col_indices = (int*) malloc((2*m+1)*sizeof(int));
351   col_vals = (PetscScalar **) malloc(m*sizeof(PetscScalar *));
352   for (i=j=0; i<m; i++, j+=2)
353     {
354       col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
355       col_vals[i] = NULL;
356     }
357   col_indices[j]=-1;
358 
359   /* size of separators for each sub-hc working from bottom of tree to top */
360   /* this looks like nsep[]=segments */
361   stages = (int*) malloc((level+1)*sizeof(PetscInt));
362   segs   = (int*) malloc((level+1)*sizeof(PetscInt));
363   ivec_zero(stages,level+1);
364   ivec_copy(segs,nsep,level+1);
365   for (i=0; i<level; i++)
366     {segs[i+1] += segs[i];}
367   stages[0] = segs[0];
368 
369   /* temporary vectors  */
370   u  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
371   z  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
372   v  = (PetscScalar *) malloc(a_m*sizeof(PetscScalar));
373   uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
374   w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));
375 
376   /* extra nnz due to replication of vertices across separators */
377   for (i=1, j=0; i<=level; i++)
378     {j+=nsep[i];}
379 
380   /* storage for sparse x values */
381   n_global = xxt_handle->info->n_global;
382   xxt_max_nnz = (int)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes;
383   x = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
384   xxt_nnz = 0;
385 
386   /* LATER - can embed next sep to fire in gs */
387   /* time to make the donuts - generate X factor */
388   for (dim=i=j=0;i<m;i++)
389     {
390       /* time to move to the next level? */
391       while (i==segs[dim])
392 	{
393 	  if (dim==level)
394 	    {error_msg_fatal("dim about to exceed level\n"); break;}
395 
396 	  stages[dim++]=i;
397 	  end+=lnsep[dim];
398 	}
399       stages[dim]=i;
400 
401       /* which column are we firing? */
402       /* i.e. set v_l */
403       /* use new seps and do global min across hc to determine which one to fire */
404       (start<end) ? (col=fo[start]) : (col=INT_MAX);
405       giop_hc(&col,&work,1,op2,dim);
406 
407       /* shouldn't need this */
408       if (col==INT_MAX)
409 	{
410 	  error_msg_warning("hey ... col==INT_MAX??\n");
411 	  continue;
412 	}
413 
414       /* do I own it? I should */
415       rvec_zero(v ,a_m);
416       if (col==fo[start])
417 	{
418 	  start++;
419 	  idex=ivec_linear_search(col, a_local2global, a_n);
420 	  if (idex!=-1)
421 	    {v[idex] = 1.0; j++;}
422 	  else
423 	    {error_msg_fatal("NOT FOUND!\n");}
424 	}
425       else
426 	{
427 	  idex=ivec_linear_search(col, a_local2global, a_m);
428 	  if (idex!=-1)
429 	    {v[idex] = 1.0;}
430 	}
431 
432       /* perform u = A.v_l */
433       rvec_zero(u,n);
434       do_matvec(xxt_handle->mvi,v,u);
435 
436       /* uu =  X^T.u_l (local portion) */
437       /* technically only need to zero out first i entries */
438       /* later turn this into an XXT_solve call ? */
439       rvec_zero(uu,m);
440       x_ptr=x;
441       iptr = col_indices;
442       for (k=0; k<i; k++)
443 	{
444 	  off = *iptr++;
445 	  len = *iptr++;
446 
447 	  uu[k] = BLASdot_(&len,u+off,&i1,x_ptr,&i1);
448 	  x_ptr+=len;
449 	}
450 
451 
452       /* uu = X^T.u_l (comm portion) */
453       ssgl_radd  (uu, w, dim, stages);
454 
455       /* z = X.uu */
456       rvec_zero(z,n);
457       x_ptr=x;
458       iptr = col_indices;
459       for (k=0; k<i; k++)
460 	{
461 	  off = *iptr++;
462 	  len = *iptr++;
463 
464 	  BLASaxpy_(&len,&uu[k],x_ptr,&i1,z+off,&i1);
465 	  x_ptr+=len;
466 	}
467 
468       /* compute v_l = v_l - z */
469       rvec_zero(v+a_n,a_m-a_n);
470       BLASaxpy_(&n,&dm1,z,&i1,v,&i1);
471 
472       /* compute u_l = A.v_l */
473       if (a_n!=a_m)
474 	{gs_gop_hc(gs_handle,v,"+\0",dim);}
475       rvec_zero(u,n);
476       do_matvec(xxt_handle->mvi,v,u);
477 
478       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
479       alpha = BLASdot_(&n,u,&i1,v,&i1);
480       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
481       grop_hc(&alpha, &alpha_w, 1, op, dim);
482 
483       alpha = (PetscScalar) sqrt((double)alpha);
484 
485       /* check for small alpha                             */
486       /* LATER use this to detect and determine null space */
487       if (fabs(alpha)<1.0e-14)
488 	{error_msg_fatal("bad alpha! %g\n",alpha);}
489 
490       /* compute v_l = v_l/sqrt(alpha) */
491       rvec_scale(v,1.0/alpha,n);
492 
493       /* add newly generated column, v_l, to X */
494       flag = 1;
495       off=len=0;
496       for (k=0; k<n; k++)
497 	{
498 	  if (v[k]!=0.0)
499 	    {
500 	      len=k;
501 	      if (flag)
502 		{off=k; flag=0;}
503 	    }
504 	}
505 
506       len -= (off-1);
507 
508       if (len>0)
509 	{
510 	  if ((xxt_nnz+len)>xxt_max_nnz)
511 	    {
512 	      error_msg_warning("increasing space for X by 2x!\n");
513 	      xxt_max_nnz *= 2;
514 	      x_ptr = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
515 	      rvec_copy(x_ptr,x,xxt_nnz);
516 	      free(x);
517 	      x = x_ptr;
518 	      x_ptr+=xxt_nnz;
519 	    }
520 	  xxt_nnz += len;
521 	  rvec_copy(x_ptr,v+off,len);
522 
523           /* keep track of number of zeros */
524 	  if (dim)
525 	    {
526 	      for (k=0; k<len; k++)
527 		{
528 		  if (x_ptr[k]==0.0)
529 		    {xxt_zero_nnz++;}
530 		}
531 	    }
532 	  else
533 	    {
534 	      for (k=0; k<len; k++)
535 		{
536 		  if (x_ptr[k]==0.0)
537 		    {xxt_zero_nnz_0++;}
538 		}
539 	    }
540 	  col_indices[2*i] = off;
541 	  col_sz[i] = col_indices[2*i+1] = len;
542 	  col_vals[i] = x_ptr;
543 	}
544       else
545 	{
546 	  col_indices[2*i] = 0;
547 	  col_sz[i] = col_indices[2*i+1] = 0;
548 	  col_vals[i] = x_ptr;
549 	}
550     }
551 
552   /* close off stages for execution phase */
553   while (dim!=level)
554     {
555       stages[dim++]=i;
556       error_msg_warning("disconnected!!! dim(%d)!=level(%d)\n",dim,level);
557     }
558   stages[dim]=i;
559 
560   xxt_handle->info->n=xxt_handle->mvi->n;
561   xxt_handle->info->m=m;
562   xxt_handle->info->nnz=xxt_nnz;
563   xxt_handle->info->max_nnz=xxt_max_nnz;
564   xxt_handle->info->msg_buf_sz=stages[level]-stages[0];
565   xxt_handle->info->solve_uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
566   xxt_handle->info->solve_w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));
567   xxt_handle->info->x=x;
568   xxt_handle->info->col_vals=col_vals;
569   xxt_handle->info->col_sz=col_sz;
570   xxt_handle->info->col_indices=col_indices;
571   xxt_handle->info->stages=stages;
572   xxt_handle->info->nsolves=0;
573   xxt_handle->info->tot_solve_time=0.0;
574 
575   free(segs);
576   free(u);
577   free(v);
578   free(uu);
579   free(z);
580   free(w);
581 
582   return(0);
583 }
584 
585 
586 /*************************************xxt.c************************************
587 Function:
588 
589 Input :
590 Output:
591 Return:
592 Description:
593 **************************************xxt.c***********************************/
594 static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle,  PetscScalar *uc)
595 {
596    int off, len, *iptr;
597   int level       =xxt_handle->level;
598   int n           =xxt_handle->info->n;
599   int m           =xxt_handle->info->m;
600   int *stages     =xxt_handle->info->stages;
601   int *col_indices=xxt_handle->info->col_indices;
602   PetscScalar *x_ptr, *uu_ptr;
603   PetscScalar *solve_uu=xxt_handle->info->solve_uu;
604   PetscScalar *solve_w =xxt_handle->info->solve_w;
605   PetscScalar *x       =xxt_handle->info->x;
606   PetscBLASInt i1 = 1;
607 
608   PetscFunctionBegin;
609   uu_ptr=solve_uu;
610   rvec_zero(uu_ptr,m);
611 
612   /* x  = X.Y^T.b */
613   /* uu = Y^T.b */
614   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
615     {
616       off=*iptr++; len=*iptr++;
617       *uu_ptr++ = BLASdot_(&len,uc+off,&i1,x_ptr,&i1);
618     }
619 
620   /* comunication of beta */
621   uu_ptr=solve_uu;
622   if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);}
623 
624   rvec_zero(uc,n);
625 
626   /* x = X.uu */
627   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
628     {
629       off=*iptr++; len=*iptr++;
630       BLASaxpy_(&len,uu_ptr++,x_ptr,&i1,uc+off,&i1);
631     }
632   PetscFunctionReturn(0);
633 }
634 
635 /*************************************xxt.c************************************
636 Function: check_handle()
637 
638 Input :
639 Output:
640 Return:
641 Description:
642 **************************************xxt.c***********************************/
643 static PetscErrorCode check_handle(xxt_ADT xxt_handle)
644 {
645   int vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};
646 
647   PetscFunctionBegin;
648   if (xxt_handle==NULL)
649     {error_msg_fatal("check_handle() :: bad handle :: NULL %d\n",xxt_handle);}
650 
651   vals[0]=vals[1]=xxt_handle->id;
652   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
653   if ((vals[0]!=vals[1])||(xxt_handle->id<=0))
654     {error_msg_fatal("check_handle() :: bad handle :: id mismatch min/max %d/%d %d\n",
655 		     vals[0],vals[1], xxt_handle->id);}
656   PetscFunctionReturn(0);
657 }
658 
659 
660 /*************************************xxt.c************************************
661 Function: det_separators
662 
663 Input :
664 Output:
665 Return:
666 Description:
667   det_separators(xxt_handle, local2global, n, m, mylocmatvec, grid_data);
668 **************************************xxt.c***********************************/
669 static  PetscErrorCode det_separators(xxt_ADT xxt_handle)
670 {
671   int i, ct, id;
672   int mask, edge, *iptr;
673   int *dir, *used;
674   int sum[4], w[4];
675   PetscScalar rsum[4], rw[4];
676   int op[] = {GL_ADD,0};
677   PetscScalar *lhs, *rhs;
678   int *nsep, *lnsep, *fo, nfo=0;
679   gs_ADT gs_handle=xxt_handle->mvi->gs_handle;
680   int *local2global=xxt_handle->mvi->local2global;
681   int  n=xxt_handle->mvi->n;
682   int  m=xxt_handle->mvi->m;
683   int level=xxt_handle->level;
684   int shared=FALSE;
685 
686   PetscFunctionBegin;
687   dir  = (int*)malloc(sizeof(PetscInt)*(level+1));
688   nsep = (int*)malloc(sizeof(PetscInt)*(level+1));
689   lnsep= (int*)malloc(sizeof(PetscInt)*(level+1));
690   fo   = (int*)malloc(sizeof(PetscInt)*(n+1));
691   used = (int*)malloc(sizeof(PetscInt)*n);
692 
693   ivec_zero(dir  ,level+1);
694   ivec_zero(nsep ,level+1);
695   ivec_zero(lnsep,level+1);
696   ivec_set (fo   ,-1,n+1);
697   ivec_zero(used,n);
698 
699   lhs  = (double*)malloc(sizeof(PetscScalar)*m);
700   rhs  = (double*)malloc(sizeof(PetscScalar)*m);
701 
702   /* determine the # of unique dof */
703   rvec_zero(lhs,m);
704   rvec_set(lhs,1.0,n);
705   gs_gop_hc(gs_handle,lhs,"+\0",level);
706   rvec_zero(rsum,2);
707   for (ct=i=0;i<n;i++)
708     {
709       if (lhs[i]!=0.0)
710 	{rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];}
711     }
712   grop_hc(rsum,rw,2,op,level);
713   rsum[0]+=0.1;
714   rsum[1]+=0.1;
715   /*  if (!my_id)
716     {
717       printf("xxt n unique = %d (%g)\n",(int) rsum[0], rsum[0]);
718       printf("xxt n shared = %d (%g)\n",(int) rsum[1], rsum[1]);
719       }*/
720 
721   if (fabs(rsum[0]-rsum[1])>EPS)
722     {shared=TRUE;}
723 
724   xxt_handle->info->n_global=xxt_handle->info->m_global=(int) rsum[0];
725   xxt_handle->mvi->n_global =xxt_handle->mvi->m_global =(int) rsum[0];
726 
727   /* determine separator sets top down */
728   if (shared)
729     {
730       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
731 	{
732 	  /* set rsh of hc, fire, and collect lhs responses */
733 	  (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
734 	  gs_gop_hc(gs_handle,lhs,"+\0",edge);
735 
736 	  /* set lsh of hc, fire, and collect rhs responses */
737 	  (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
738 	  gs_gop_hc(gs_handle,rhs,"+\0",edge);
739 
740 	  for (i=0;i<n;i++)
741 	    {
742 	      if (id< mask)
743 		{
744 		  if (lhs[i]!=0.0)
745 		    {lhs[i]=1.0;}
746 		}
747 	      if (id>=mask)
748 		{
749 		  if (rhs[i]!=0.0)
750 		    {rhs[i]=1.0;}
751 		}
752 	    }
753 
754 	  if (id< mask)
755 	    {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);}
756 	  else
757 	    {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);}
758 
759 	  /* count number of dofs I own that have signal and not in sep set */
760 	  rvec_zero(rsum,4);
761 	  for (ivec_zero(sum,4),ct=i=0;i<n;i++)
762 	    {
763 	      if (!used[i])
764 		{
765 		  /* number of unmarked dofs on node */
766 		  ct++;
767 		  /* number of dofs to be marked on lhs hc */
768 		  if (id< mask)
769 		    {
770 		      if (lhs[i]!=0.0)
771 			{sum[0]++; rsum[0]+=1.0/lhs[i];}
772 		    }
773 		  /* number of dofs to be marked on rhs hc */
774 		  if (id>=mask)
775 		    {
776 		      if (rhs[i]!=0.0)
777 			{sum[1]++; rsum[1]+=1.0/rhs[i];}
778 		    }
779 		}
780 	    }
781 
782 	  /* go for load balance - choose half with most unmarked dofs, bias LHS */
783 	  (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
784 	  (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
785 	  giop_hc(sum,w,4,op,edge);
786 	  grop_hc(rsum,rw,4,op,edge);
787 	  rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;
788 
789 	  if (id<mask)
790 	    {
791 	      /* mark dofs I own that have signal and not in sep set */
792 	      for (ct=i=0;i<n;i++)
793 		{
794 		  if ((!used[i])&&(lhs[i]!=0.0))
795 		    {
796 		      ct++; nfo++;
797 
798 		      if (nfo>n)
799 			{error_msg_fatal("nfo about to exceed n\n");}
800 
801 		      *--iptr = local2global[i];
802 		      used[i]=edge;
803 		    }
804 		}
805 	      if (ct>1) {ivec_sort(iptr,ct);}
806 
807 	      lnsep[edge]=ct;
808 	      nsep[edge]=(int) rsum[0];
809 	      dir [edge]=LEFT;
810 	    }
811 
812 	  if (id>=mask)
813 	    {
814 	      /* mark dofs I own that have signal and not in sep set */
815 	      for (ct=i=0;i<n;i++)
816 		{
817 		  if ((!used[i])&&(rhs[i]!=0.0))
818 		    {
819 		      ct++; nfo++;
820 
821 		      if (nfo>n)
822 			{error_msg_fatal("nfo about to exceed n\n");}
823 
824 		      *--iptr = local2global[i];
825 		      used[i]=edge;
826 		    }
827 		}
828 	      if (ct>1) {ivec_sort(iptr,ct);}
829 
830 	      lnsep[edge]=ct;
831 	      nsep[edge]= (int) rsum[1];
832 	      dir [edge]=RIGHT;
833 	    }
834 
835 	  /* LATER or we can recur on these to order seps at this level */
836 	  /* do we need full set of separators for this?                */
837 
838 	  /* fold rhs hc into lower */
839 	  if (id>=mask)
840 	    {id-=mask;}
841 	}
842     }
843   else
844     {
845       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
846 	{
847 	  /* set rsh of hc, fire, and collect lhs responses */
848 	  (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
849 	  gs_gop_hc(gs_handle,lhs,"+\0",edge);
850 
851 	  /* set lsh of hc, fire, and collect rhs responses */
852 	  (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
853 	  gs_gop_hc(gs_handle,rhs,"+\0",edge);
854 
855 	  /* count number of dofs I own that have signal and not in sep set */
856 	  for (ivec_zero(sum,4),ct=i=0;i<n;i++)
857 	    {
858 	      if (!used[i])
859 		{
860 		  /* number of unmarked dofs on node */
861 		  ct++;
862 		  /* number of dofs to be marked on lhs hc */
863 		  if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;}
864 		  /* number of dofs to be marked on rhs hc */
865 		  if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;}
866 		}
867 	    }
868 
869 	  /* go for load balance - choose half with most unmarked dofs, bias LHS */
870 	  (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
871 	  giop_hc(sum,w,4,op,edge);
872 
873 	  /* lhs hc wins */
874 	  if (sum[2]>=sum[3])
875 	    {
876 	      if (id<mask)
877 		{
878 		  /* mark dofs I own that have signal and not in sep set */
879 		  for (ct=i=0;i<n;i++)
880 		    {
881 		      if ((!used[i])&&(lhs[i]!=0.0))
882 			{
883 			  ct++; nfo++;
884 			  *--iptr = local2global[i];
885 			  used[i]=edge;
886 			}
887 		    }
888 		  if (ct>1) {ivec_sort(iptr,ct);}
889 		  lnsep[edge]=ct;
890 		}
891 	      nsep[edge]=sum[0];
892 	      dir [edge]=LEFT;
893 	    }
894 	  /* rhs hc wins */
895 	  else
896 	    {
897 	      if (id>=mask)
898 		{
899 		  /* mark dofs I own that have signal and not in sep set */
900 		  for (ct=i=0;i<n;i++)
901 		    {
902 		      if ((!used[i])&&(rhs[i]!=0.0))
903 			{
904 			  ct++; nfo++;
905 			  *--iptr = local2global[i];
906 			  used[i]=edge;
907 			}
908 		    }
909 		  if (ct>1) {ivec_sort(iptr,ct);}
910 		  lnsep[edge]=ct;
911 		}
912 	      nsep[edge]=sum[1];
913 	      dir [edge]=RIGHT;
914 	    }
915 	  /* LATER or we can recur on these to order seps at this level */
916 	  /* do we need full set of separators for this?                */
917 
918 	  /* fold rhs hc into lower */
919 	  if (id>=mask)
920 	    {id-=mask;}
921 	}
922     }
923 
924 
925   /* level 0 is on processor case - so mark the remainder */
926   for (ct=i=0;i<n;i++)
927     {
928       if (!used[i])
929 	{
930 	  ct++; nfo++;
931 	  *--iptr = local2global[i];
932 	  used[i]=edge;
933 	}
934     }
935   if (ct>1) {ivec_sort(iptr,ct);}
936   lnsep[edge]=ct;
937   nsep [edge]=ct;
938   dir  [edge]=LEFT;
939 
940   xxt_handle->info->nsep=nsep;
941   xxt_handle->info->lnsep=lnsep;
942   xxt_handle->info->fo=fo;
943   xxt_handle->info->nfo=nfo;
944 
945   free(dir);
946   free(lhs);
947   free(rhs);
948   free(used);
949   PetscFunctionReturn(0);
950 }
951 
952 
953 /*************************************xxt.c************************************
954 Function: set_mvi
955 
956 Input :
957 Output:
958 Return:
959 Description:
960 **************************************xxt.c***********************************/
961 static
962 mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data)
963 {
964   mv_info *mvi;
965 
966 
967   mvi = (mv_info*)malloc(sizeof(mv_info));
968   mvi->n=n;
969   mvi->m=m;
970   mvi->n_global=-1;
971   mvi->m_global=-1;
972   mvi->local2global=(int*)malloc((m+1)*sizeof(PetscInt));
973   ivec_copy(mvi->local2global,local2global,m);
974   mvi->local2global[m] = INT_MAX;
975   mvi->matvec=(PetscErrorCode (*)(mv_info*,PetscScalar*,PetscScalar*))matvec;
976   mvi->grid_data=grid_data;
977 
978   /* set xxt communication handle to perform restricted matvec */
979   mvi->gs_handle = gs_init(local2global, m, num_nodes);
980 
981   return(mvi);
982 }
983 
984 
985 /*************************************xxt.c************************************
986 Function: set_mvi
987 
988 Input :
989 Output:
990 Return:
991 Description:
992 
993       computes u = A.v
994       do_matvec(xxt_handle->mvi,v,u);
995 **************************************xxt.c***********************************/
996 static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u)
997 {
998   PetscFunctionBegin;
999   A->matvec((mv_info*)A->grid_data,v,u);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 
1004 
1005