xref: /petsc/src/ksp/pc/impls/tfs/xyt.c (revision 2ca5cae0e13a3030e66dfe4e3774e6d1dcb097fa)
1 
2 /*************************************xyt.c************************************
3 Module Name: xyt
4 Module Info:
5 
6 author:  Henry M. Tufo III
7 e-mail:  hmt@asci.uchicago.edu
8 contact:
9 +--------------------------------+--------------------------------+
10 |MCS Division - Building 221     |Department of Computer Science  |
11 |Argonne National Laboratory     |Ryerson 152                     |
12 |9700 S. Cass Avenue             |The University of Chicago       |
13 |Argonne, IL  60439              |Chicago, IL  60637              |
14 |(630) 252-5354/5986 ph/fx       |(773) 702-6019/8487 ph/fx       |
15 +--------------------------------+--------------------------------+
16 
17 Last Modification: 3.20.01
18 **************************************xyt.c***********************************/
19 
20 
21 /*************************************xyt.c************************************
22 NOTES ON USAGE:
23 
24 **************************************xyt.c***********************************/
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <limits.h>
28 #include <float.h>
29 #include <math.h>
30 #if   defined NXSRC
31 #include <nx.h>
32 #elif defined MPISRC
33 #include <mpi.h>
34 #endif
35 
36 #include "const.h"
37 #include "types.h"
38 #include "comm.h"
39 #include "error.h"
40 #include "ivec.h"
41 #include "bss_malloc.h"
42 #include "queue.h"
43 #include "gs.h"
44 #ifdef MLSRC
45 #include "ml_include.h"
46 #endif
47 #include "blas.h"
48 #include "xyt.h"
49 
50 #define LEFT  -1
51 #define RIGHT  1
52 #define BOTH   0
53 #define MAX_FORTRAN_HANDLES  10
54 
55 typedef struct xyt_solver_info {
56   int n, m, n_global, m_global;
57   int nnz, max_nnz, msg_buf_sz;
58   int *nsep, *lnsep, *fo, nfo, *stages;
59   int *xcol_sz, *xcol_indices;
60   REAL **xcol_vals, *x, *solve_uu, *solve_w;
61   int *ycol_sz, *ycol_indices;
62   REAL **ycol_vals, *y;
63   int nsolves;
64   REAL tot_solve_time;
65 } xyt_info;
66 
67 
68 typedef struct matvec_info {
69   int n, m, n_global, m_global;
70   int *local2global;
71   gs_ADT gs_handle;
72   PetscErrorCode (*matvec)(struct matvec_info*,REAL*,REAL*);
73   void *grid_data;
74 } mv_info;
75 
76 struct xyt_CDT{
77   int id;
78   int ns;
79   int level;
80   xyt_info *info;
81   mv_info  *mvi;
82 };
83 
84 static int n_xyt=0;
85 static int n_xyt_handles=0;
86 
87 /* prototypes */
88 static void do_xyt_solve(xyt_ADT xyt_handle, REAL *rhs);
89 static void check_init(void);
90 static void check_handle(xyt_ADT xyt_handle);
91 static void det_separators(xyt_ADT xyt_handle);
92 static void do_matvec(mv_info *A, REAL *v, REAL *u);
93 static int xyt_generate(xyt_ADT xyt_handle);
94 static int do_xyt_factor(xyt_ADT xyt_handle);
95 static mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data);
96 #ifdef MLSRC
97 void ML_XYT_solve(xyt_ADT xyt_handle, int lx, double *x, int lb, double *b);
98 PetscErrorCode  ML_XYT_factor(xyt_ADT xyt_handle, int *local2global, int n, int m,
99 		   void *matvec, void *grid_data, int grid_tag, ML *my_ml);
100 #endif
101 
102 
103 /*************************************xyt.c************************************
104 Function: XYT_new()
105 
106 Input :
107 Output:
108 Return:
109 Description:
110 **************************************xyt.c***********************************/
111 xyt_ADT
112 XYT_new(void)
113 {
114   xyt_ADT xyt_handle;
115 
116 
117 #ifdef DEBUG
118   error_msg_warning("XYT_new() :: start %d\n",n_xyt_handles);
119 #endif
120 
121   /* rolling count on n_xyt ... pot. problem here */
122   n_xyt_handles++;
123   xyt_handle       = (xyt_ADT)bss_malloc(sizeof(struct xyt_CDT));
124   xyt_handle->id   = ++n_xyt;
125   xyt_handle->info = NULL;
126   xyt_handle->mvi  = NULL;
127 
128 #ifdef DEBUG
129   error_msg_warning("XYT_new() :: end   %d\n",n_xyt_handles);
130 #endif
131 
132   return(xyt_handle);
133 }
134 
135 
136 /*************************************xyt.c************************************
137 Function: XYT_factor()
138 
139 Input :
140 Output:
141 Return:
142 Description:
143 **************************************xyt.c***********************************/
144 int
145 XYT_factor(xyt_ADT xyt_handle, /* prev. allocated xyt  handle */
146 	   int *local2global,  /* global column mapping       */
147 	   int n,              /* local num rows              */
148 	   int m,              /* local num cols              */
149 	   void *matvec,       /* b_loc=A_local.x_loc         */
150 	   void *grid_data     /* grid data for matvec        */
151 	   )
152 {
153 #ifdef DEBUG
154   int flag;
155 
156 
157   error_msg_warning("XYT_factor() :: start %d\n",n_xyt_handles);
158 #endif
159 
160   check_init();
161   check_handle(xyt_handle);
162 
163   /* only 2^k for now and all nodes participating */
164   if ((1<<(xyt_handle->level=i_log2_num_nodes))!=num_nodes)
165     {error_msg_fatal("only 2^k for now and MPI_COMM_WORLD!!! %d != %d\n",1<<i_log2_num_nodes,num_nodes);}
166 
167   /* space for X info */
168   xyt_handle->info = (xyt_info*)bss_malloc(sizeof(xyt_info));
169 
170   /* set up matvec handles */
171   xyt_handle->mvi  = set_mvi(local2global, n, m, matvec, grid_data);
172 
173   /* matrix is assumed to be of full rank */
174   /* LATER we can reset to indicate rank def. */
175   xyt_handle->ns=0;
176 
177   /* determine separators and generate firing order - NB xyt info set here */
178   det_separators(xyt_handle);
179 
180 #ifdef DEBUG
181   flag = do_xyt_factor(xyt_handle);
182   error_msg_warning("XYT_factor() :: end   %d (flag=%d)\n",n_xyt_handles,flag);
183   return(flag);
184 #else
185   return(do_xyt_factor(xyt_handle));
186 #endif
187 }
188 
189 
190 /*************************************xyt.c************************************
191 Function: XYT_solve
192 
193 Input :
194 Output:
195 Return:
196 Description:
197 **************************************xyt.c***********************************/
198 int
199 XYT_solve(xyt_ADT xyt_handle, double *x, double *b)
200 {
201 #if defined( NXSRC) && defined(TIMING)
202   double dclock(),    time=0.0;
203 #elif defined(MPISRC) && defined(TIMING)
204   double MPI_Wtime(), time=0.0;
205 #endif
206 #ifdef INFO
207   REAL vals[3], work[3];
208   int op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
209 #endif
210 
211 
212 #ifdef DEBUG
213   error_msg_warning("XYT_solve() :: start %d\n",n_xyt_handles);
214 #endif
215 
216   check_init();
217   check_handle(xyt_handle);
218 
219 #if defined(NXSRC) && defined(TIMING)
220   time = dclock();
221 #elif defined(MPISRC) && defined(TIMING)
222   time = MPI_Wtime();
223 #endif
224 
225   /* need to copy b into x? */
226   if (b)
227     {rvec_copy(x,b,xyt_handle->mvi->n);}
228   do_xyt_solve(xyt_handle,x);
229 
230 #if defined(NXSRC) && defined(TIMING)
231   time = dclock() - time;
232 #elif defined(MPISRC) && defined(TIMING)
233   time = MPI_Wtime() - time;
234 #endif
235 
236 #ifdef TIMING
237   xyt_handle->info->nsolves++;
238   xyt_handle->info->tot_solve_time+=time;
239 
240 #ifdef INFO
241   vals[0]=vals[1]=vals[2]= (REAL) time;
242   grop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
243   if (!my_id)
244     {
245       printf("%d :: min   xyt_slv=%g\n",my_id,vals[0]);
246       printf("%d :: max   xyt_slv=%g\n",my_id,vals[1]);
247       printf("%d :: avg   xyt_slv=%g\n",my_id,vals[2]/num_nodes);
248     }
249 #endif
250 #endif
251 
252 #ifdef DEBUG
253   error_msg_warning("XYT_solve() :: end   %d\n",n_xyt_handles);
254 #endif
255 
256   return(0);
257 }
258 
259 
260 /*************************************xyt.c************************************
261 Function: XYT_free()
262 
263 Input :
264 Output:
265 Return:
266 Description:
267 **************************************xyt.c***********************************/
268 int
269 XYT_free(xyt_ADT xyt_handle)
270 {
271 #ifdef DEBUG
272   error_msg_warning("XYT_free() :: start %d\n",n_xyt_handles);
273 #endif
274 
275   check_init();
276   check_handle(xyt_handle);
277   n_xyt_handles--;
278 
279   bss_free(xyt_handle->info->nsep);
280   bss_free(xyt_handle->info->lnsep);
281   bss_free(xyt_handle->info->fo);
282   bss_free(xyt_handle->info->stages);
283   bss_free(xyt_handle->info->solve_uu);
284   bss_free(xyt_handle->info->solve_w);
285   bss_free(xyt_handle->info->x);
286   bss_free(xyt_handle->info->xcol_vals);
287   bss_free(xyt_handle->info->xcol_sz);
288   bss_free(xyt_handle->info->xcol_indices);
289   bss_free(xyt_handle->info->y);
290   bss_free(xyt_handle->info->ycol_vals);
291   bss_free(xyt_handle->info->ycol_sz);
292   bss_free(xyt_handle->info->ycol_indices);
293   bss_free(xyt_handle->info);
294   bss_free(xyt_handle->mvi->local2global);
295    gs_free(xyt_handle->mvi->gs_handle);
296   bss_free(xyt_handle->mvi);
297   bss_free(xyt_handle);
298 
299 
300 #ifdef DEBUG
301   error_msg_warning("perm frees = %d\n",perm_frees());
302   error_msg_warning("perm calls = %d\n",perm_calls());
303   error_msg_warning("bss frees  = %d\n",bss_frees());
304   error_msg_warning("bss calls  = %d\n",bss_calls());
305   error_msg_warning("XYT_free() :: end   %d\n",n_xyt_handles);
306 #endif
307 
308   /* if the check fails we nuke */
309   /* if NULL pointer passed to bss_free we nuke */
310   /* if the calls to free fail that's not my problem */
311   return(0);
312 }
313 
314 
315 #ifdef MLSRC
316 /*************************************xyt.c************************************
317 Function: ML_XYT_factor()
318 
319 Input :
320 Output:
321 Return:
322 Description:
323 
324 ML requires that the solver call be checked in
325 **************************************xyt.c***********************************/
326 PetscErrorCode
327 ML_XYT_factor(xyt_ADT xyt_handle,  /* prev. allocated xyt  handle */
328 	      int *local2global,   /* global column mapping       */
329 	      int n,               /* local num rows              */
330 	      int m,               /* local num cols              */
331 	      void *matvec,        /* b_loc=A_local.x_loc         */
332 	      void *grid_data,     /* grid data for matvec        */
333 	      int grid_tag,        /* grid tag for ML_Set_CSolve  */
334 	      ML *my_ml            /* ML handle                   */
335 	      )
336 {
337 #ifdef DEBUG
338   int flag;
339 #endif
340 
341 
342 #ifdef DEBUG
343   error_msg_warning("ML_XYT_factor() :: start %d\n",n_xyt_handles);
344 #endif
345 
346   check_init();
347   check_handle(xyt_handle);
348   if (my_ml->comm->ML_mypid!=my_id)
349     {error_msg_fatal("ML_XYT_factor bad my_id %d\t%d\n",
350 		     my_ml->comm->ML_mypid,my_id);}
351   if (my_ml->comm->ML_nprocs!=num_nodes)
352     {error_msg_fatal("ML_XYT_factor bad np %d\t%d\n",
353 		     my_ml->comm->ML_nprocs,num_nodes);}
354 
355   my_ml->SingleLevel[grid_tag].csolve->func->external = ML_XYT_solve;
356   my_ml->SingleLevel[grid_tag].csolve->func->ML_id = ML_EXTERNAL;
357   my_ml->SingleLevel[grid_tag].csolve->data = xyt_handle;
358 
359   /* done ML specific stuff ... back to reg sched pgm */
360 #ifdef DEBUG
361   flag = XYT_factor(xyt_handle, local2global, n, m, matvec, grid_data);
362   error_msg_warning("ML_XYT_factor() :: end   %d (flag=%d)\n",n_xyt_handles,flag);
363   return(flag);
364 #else
365   return(XYT_factor(xyt_handle, local2global, n, m, matvec, grid_data));
366 #endif
367 }
368 
369 
370 /*************************************xyt.c************************************
371 Function: ML_XYT_solve
372 
373 Input :
374 Output:
375 Return:
376 Description:
377 **************************************xyt.c***********************************/
378 void
379 ML_XYT_solve(xyt_ADT xyt_handle, int lx, double *sol, int lb, double *rhs)
380 {
381   XYT_solve(xyt_handle, sol, rhs);
382 }
383 #endif
384 
385 
386 /*************************************xyt.c************************************
387 Function:
388 
389 Input :
390 Output:
391 Return:
392 Description:
393 **************************************xyt.c***********************************/
394 int
395 XYT_stats(xyt_ADT xyt_handle)
396 {
397   int  op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
398   int fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
399   int   vals[9],  work[9];
400   REAL fvals[3], fwork[3];
401 
402 
403 #ifdef DEBUG
404   error_msg_warning("xyt_stats() :: begin\n");
405 #endif
406 
407   check_init();
408   check_handle(xyt_handle);
409 
410   /* if factorization not done there are no stats */
411   if (!xyt_handle->info||!xyt_handle->mvi)
412     {
413       if (!my_id)
414 	{printf("XYT_stats() :: no stats available!\n");}
415       return 1;
416     }
417 
418   vals[0]=vals[1]=vals[2]=xyt_handle->info->nnz;
419   vals[3]=vals[4]=vals[5]=xyt_handle->mvi->n;
420   vals[6]=vals[7]=vals[8]=xyt_handle->info->msg_buf_sz;
421   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
422 
423   fvals[0]=fvals[1]=fvals[2]
424     =xyt_handle->info->tot_solve_time/xyt_handle->info->nsolves++;
425   grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);
426 
427   if (!my_id)
428     {
429       printf("%d :: min   xyt_nnz=%d\n",my_id,vals[0]);
430       printf("%d :: max   xyt_nnz=%d\n",my_id,vals[1]);
431       printf("%d :: avg   xyt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes);
432       printf("%d :: tot   xyt_nnz=%d\n",my_id,vals[2]);
433       printf("%d :: xyt   C(2d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5)));
434       printf("%d :: xyt   C(3d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667)));
435       printf("%d :: min   xyt_n  =%d\n",my_id,vals[3]);
436       printf("%d :: max   xyt_n  =%d\n",my_id,vals[4]);
437       printf("%d :: avg   xyt_n  =%g\n",my_id,1.0*vals[5]/num_nodes);
438       printf("%d :: tot   xyt_n  =%d\n",my_id,vals[5]);
439       printf("%d :: min   xyt_buf=%d\n",my_id,vals[6]);
440       printf("%d :: max   xyt_buf=%d\n",my_id,vals[7]);
441       printf("%d :: avg   xyt_buf=%g\n",my_id,1.0*vals[8]/num_nodes);
442       printf("%d :: min   xyt_slv=%g\n",my_id,fvals[0]);
443       printf("%d :: max   xyt_slv=%g\n",my_id,fvals[1]);
444       printf("%d :: avg   xyt_slv=%g\n",my_id,fvals[2]/num_nodes);
445     }
446 
447 #ifdef DEBUG
448   error_msg_warning("xyt_stats() :: end\n");
449 #endif
450 
451   return(0);
452 }
453 
454 
455 /*************************************xyt.c************************************
456 Function: do_xyt_factor
457 
458 Input :
459 Output:
460 Return:
461 Description: get A_local, local portion of global coarse matrix which
462 is a row dist. nxm matrix w/ n<m.
463    o my_ml holds address of ML struct associated w/A_local and coarse grid
464    o local2global holds global number of column i (i=0,...,m-1)
465    o local2global holds global number of row    i (i=0,...,n-1)
466    o mylocmatvec performs A_local . vec_local (note that gs is performed using
467    gs_init/gop).
468 
469 mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
470 mylocmatvec (void :: void *data, double *in, double *out)
471 **************************************xyt.c***********************************/
472 static
473 int
474 do_xyt_factor(xyt_ADT xyt_handle)
475 {
476   int flag;
477 
478 
479 #ifdef DEBUG
480   error_msg_warning("do_xyt_factor() :: begin\n");
481 #endif
482 
483   flag=xyt_generate(xyt_handle);
484 
485 #ifdef INFO
486   XYT_stats(xyt_handle);
487   bss_stats();
488   perm_stats();
489 #endif
490 
491 #ifdef DEBUG
492   error_msg_warning("do_xyt_factor() :: end\n");
493 #endif
494 
495   return(flag);
496 }
497 
498 
499 /*************************************xyt.c************************************
500 Function:
501 
502 Input :
503 Output:
504 Return:
505 Description:
506 **************************************xyt.c***********************************/
507 static
508 int
509 xyt_generate(xyt_ADT xyt_handle)
510 {
511   int i,j,k,idx;
512   int dim, col;
513   REAL *u, *uu, *v, *z, *w, alpha, alpha_w;
514   int *segs;
515   int op[] = {GL_ADD,0};
516   int off, len;
517   REAL *x_ptr, *y_ptr;
518   int *iptr, flag;
519   int start=0, end, work;
520   int op2[] = {GL_MIN,0};
521   gs_ADT gs_handle;
522   int *nsep, *lnsep, *fo;
523   int a_n=xyt_handle->mvi->n;
524   int a_m=xyt_handle->mvi->m;
525   int *a_local2global=xyt_handle->mvi->local2global;
526   int level;
527   int n, m;
528   int *xcol_sz, *xcol_indices, *stages;
529   REAL **xcol_vals, *x;
530   int *ycol_sz, *ycol_indices;
531   REAL **ycol_vals, *y;
532   int n_global;
533   int xt_nnz=0, xt_max_nnz=0;
534   int yt_nnz=0, yt_max_nnz=0;
535   int xt_zero_nnz  =0;
536   int xt_zero_nnz_0=0;
537   int yt_zero_nnz  =0;
538   int yt_zero_nnz_0=0;
539 
540 
541 #ifdef DEBUG
542   error_msg_warning("xyt_generate() :: begin\n");
543 #endif
544 
545   n=xyt_handle->mvi->n;
546   nsep=xyt_handle->info->nsep;
547   lnsep=xyt_handle->info->lnsep;
548   fo=xyt_handle->info->fo;
549   end=lnsep[0];
550   level=xyt_handle->level;
551   gs_handle=xyt_handle->mvi->gs_handle;
552 
553   /* is there a null space? */
554   /* LATER add in ability to detect null space by checking alpha */
555   for (i=0, j=0; i<=level; i++)
556     {j+=nsep[i];}
557 
558   m = j-xyt_handle->ns;
559   if (m!=j)
560     {printf("xyt_generate() :: null space exists %d %d %d\n",m,j,xyt_handle->ns);}
561 
562   error_msg_warning("xyt_generate() :: X(%d,%d)\n",n,m);
563 
564   /* get and initialize storage for x local         */
565   /* note that x local is nxm and stored by columns */
566   xcol_sz = (int*) bss_malloc(m*INT_LEN);
567   xcol_indices = (int*) bss_malloc((2*m+1)*sizeof(int));
568   xcol_vals = (REAL **) bss_malloc(m*sizeof(REAL *));
569   for (i=j=0; i<m; i++, j+=2)
570     {
571       xcol_indices[j]=xcol_indices[j+1]=xcol_sz[i]=-1;
572       xcol_vals[i] = NULL;
573     }
574   xcol_indices[j]=-1;
575 
576   /* get and initialize storage for y local         */
577   /* note that y local is nxm and stored by columns */
578   ycol_sz = (int*) bss_malloc(m*INT_LEN);
579   ycol_indices = (int*) bss_malloc((2*m+1)*sizeof(int));
580   ycol_vals = (REAL **) bss_malloc(m*sizeof(REAL *));
581   for (i=j=0; i<m; i++, j+=2)
582     {
583       ycol_indices[j]=ycol_indices[j+1]=ycol_sz[i]=-1;
584       ycol_vals[i] = NULL;
585     }
586   ycol_indices[j]=-1;
587 
588   /* size of separators for each sub-hc working from bottom of tree to top */
589   /* this looks like nsep[]=segments */
590   stages = (int*) bss_malloc((level+1)*INT_LEN);
591   segs   = (int*) bss_malloc((level+1)*INT_LEN);
592   ivec_zero(stages,level+1);
593   ivec_copy(segs,nsep,level+1);
594   for (i=0; i<level; i++)
595     {segs[i+1] += segs[i];}
596   stages[0] = segs[0];
597 
598   /* temporary vectors  */
599   u  = (REAL *) bss_malloc(n*sizeof(REAL));
600   z  = (REAL *) bss_malloc(n*sizeof(REAL));
601   v  = (REAL *) bss_malloc(a_m*sizeof(REAL));
602   uu = (REAL *) bss_malloc(m*sizeof(REAL));
603   w  = (REAL *) bss_malloc(m*sizeof(REAL));
604 
605   /* extra nnz due to replication of vertices across separators */
606   for (i=1, j=0; i<=level; i++)
607     {j+=nsep[i];}
608 
609   /* storage for sparse x values */
610   n_global = xyt_handle->info->n_global;
611   xt_max_nnz = yt_max_nnz = (int)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes;
612   x = (REAL *) bss_malloc(xt_max_nnz*sizeof(REAL));
613   y = (REAL *) bss_malloc(yt_max_nnz*sizeof(REAL));
614 
615   /* LATER - can embed next sep to fire in gs */
616   /* time to make the donuts - generate X factor */
617   for (dim=i=j=0;i<m;i++)
618     {
619       /* time to move to the next level? */
620       while (i==segs[dim])
621 	{
622 #ifdef SAFE
623 	  if (dim==level)
624 	    {error_msg_fatal("dim about to exceed level\n"); break;}
625 #endif
626 
627 	  stages[dim++]=i;
628 	  end+=lnsep[dim];
629 	}
630       stages[dim]=i;
631 
632       /* which column are we firing? */
633       /* i.e. set v_l */
634       /* use new seps and do global min across hc to determine which one to fire */
635       (start<end) ? (col=fo[start]) : (col=INT_MAX);
636       giop_hc(&col,&work,1,op2,dim);
637 
638       /* shouldn't need this */
639       if (col==INT_MAX)
640 	{
641 	  error_msg_warning("hey ... col==INT_MAX??\n");
642 	  continue;
643 	}
644 
645       /* do I own it? I should */
646       rvec_zero(v ,a_m);
647       if (col==fo[start])
648 	{
649 	  start++;
650 	  idx=ivec_linear_search(col, a_local2global, a_n);
651 	  if (idx!=-1)
652 	    {v[idx] = 1.0; j++;}
653 	  else
654 	    {error_msg_fatal("NOT FOUND!\n");}
655 	}
656       else
657 	{
658 	  idx=ivec_linear_search(col, a_local2global, a_m);
659 	  if (idx!=-1)
660 	    {v[idx] = 1.0;}
661 	}
662 
663       /* perform u = A.v_l */
664       rvec_zero(u,n);
665       do_matvec(xyt_handle->mvi,v,u);
666 
667       /* uu =  X^T.u_l (local portion) */
668       /* technically only need to zero out first i entries */
669       /* later turn this into an XYT_solve call ? */
670       rvec_zero(uu,m);
671       y_ptr=y;
672       iptr = ycol_indices;
673       for (k=0; k<i; k++)
674 	{
675 	  off = *iptr++;
676 	  len = *iptr++;
677 
678 #if   BLAS||CBLAS
679 	  uu[k] = dot(len,u+off,1,y_ptr,1);
680 #else
681 	  uu[k] = rvec_dot(u+off,y_ptr,len);
682 #endif
683 	  y_ptr+=len;
684 	}
685 
686       /* uu = X^T.u_l (comm portion) */
687       ssgl_radd  (uu, w, dim, stages);
688 
689       /* z = X.uu */
690       rvec_zero(z,n);
691       x_ptr=x;
692       iptr = xcol_indices;
693       for (k=0; k<i; k++)
694 	{
695 	  off = *iptr++;
696 	  len = *iptr++;
697 
698 #if   BLAS||CBLAS
699 	  axpy(len,uu[k],x_ptr,1,z+off,1);
700 #else
701 	  rvec_axpy(z+off,x_ptr,uu[k],len);
702 #endif
703 	  x_ptr+=len;
704 	}
705 
706       /* compute v_l = v_l - z */
707       rvec_zero(v+a_n,a_m-a_n);
708 #if   BLAS||CBLAS
709       axpy(n,-1.0,z,1,v,1);
710 #else
711       rvec_axpy(v,z,-1.0,n);
712 #endif
713 
714       /* compute u_l = A.v_l */
715       if (a_n!=a_m)
716 	{gs_gop_hc(gs_handle,v,"+\0",dim);}
717       rvec_zero(u,n);
718      do_matvec(xyt_handle->mvi,v,u);
719 
720       /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - local portion */
721 #if   BLAS||CBLAS
722       alpha = ddot(n,u,1,u,1);
723 #else
724       alpha = rvec_dot(u,u,n);
725 #endif
726       /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - comm portion */
727       grop_hc(&alpha, &alpha_w, 1, op, dim);
728 
729       alpha = (REAL) sqrt((double)alpha);
730 
731       /* check for small alpha                             */
732       /* LATER use this to detect and determine null space */
733 #ifdef r8
734       if (fabs(alpha)<1.0e-14)
735 	{error_msg_fatal("bad alpha! %g\n",alpha);}
736 #else
737       if (fabs((double) alpha) < 1.0e-6)
738 	{error_msg_fatal("bad alpha! %g\n",alpha);}
739 #endif
740 
741       /* compute v_l = v_l/sqrt(alpha) */
742       rvec_scale(v,1.0/alpha,n);
743       rvec_scale(u,1.0/alpha,n);
744 
745       /* add newly generated column, v_l, to X */
746       flag = 1;
747       off=len=0;
748       for (k=0; k<n; k++)
749 	{
750 	  if (v[k]!=0.0)
751 	    {
752 	      len=k;
753 	      if (flag)
754 		{off=k; flag=0;}
755 	    }
756 	}
757 
758       len -= (off-1);
759 
760       if (len>0)
761 	{
762 	  if ((xt_nnz+len)>xt_max_nnz)
763 	    {
764 	      error_msg_warning("increasing space for X by 2x!\n");
765 	      xt_max_nnz *= 2;
766 	      x_ptr = (REAL *) bss_malloc(xt_max_nnz*sizeof(REAL));
767 	      rvec_copy(x_ptr,x,xt_nnz);
768 	      bss_free(x);
769 	      x = x_ptr;
770 	      x_ptr+=xt_nnz;
771 	    }
772 	  xt_nnz += len;
773 	  rvec_copy(x_ptr,v+off,len);
774 
775           /* keep track of number of zeros */
776 	  if (dim)
777 	    {
778 	      for (k=0; k<len; k++)
779 		{
780 		  if (x_ptr[k]==0.0)
781 		    {xt_zero_nnz++;}
782 		}
783 	    }
784 	  else
785 	    {
786 	      for (k=0; k<len; k++)
787 		{
788 		  if (x_ptr[k]==0.0)
789 		    {xt_zero_nnz_0++;}
790 		}
791 	    }
792 	  xcol_indices[2*i] = off;
793 	  xcol_sz[i] = xcol_indices[2*i+1] = len;
794 	  xcol_vals[i] = x_ptr;
795 	}
796       else
797 	{
798 	  xcol_indices[2*i] = 0;
799 	  xcol_sz[i] = xcol_indices[2*i+1] = 0;
800 	  xcol_vals[i] = x_ptr;
801 	}
802 
803 
804       /* add newly generated column, u_l, to Y */
805       flag = 1;
806       off=len=0;
807       for (k=0; k<n; k++)
808 	{
809 	  if (u[k]!=0.0)
810 	    {
811 	      len=k;
812 	      if (flag)
813 		{off=k; flag=0;}
814 	    }
815 	}
816 
817       len -= (off-1);
818 
819       if (len>0)
820 	{
821 	  if ((yt_nnz+len)>yt_max_nnz)
822 	    {
823 	      error_msg_warning("increasing space for Y by 2x!\n");
824 	      yt_max_nnz *= 2;
825 	      y_ptr = (REAL *) bss_malloc(yt_max_nnz*sizeof(REAL));
826 	      rvec_copy(y_ptr,y,yt_nnz);
827 	      bss_free(y);
828 	      y = y_ptr;
829 	      y_ptr+=yt_nnz;
830 	    }
831 	  yt_nnz += len;
832 	  rvec_copy(y_ptr,u+off,len);
833 
834           /* keep track of number of zeros */
835 	  if (dim)
836 	    {
837 	      for (k=0; k<len; k++)
838 		{
839 		  if (y_ptr[k]==0.0)
840 		    {yt_zero_nnz++;}
841 		}
842 	    }
843 	  else
844 	    {
845 	      for (k=0; k<len; k++)
846 		{
847 		  if (y_ptr[k]==0.0)
848 		    {yt_zero_nnz_0++;}
849 		}
850 	    }
851 	  ycol_indices[2*i] = off;
852 	  ycol_sz[i] = ycol_indices[2*i+1] = len;
853 	  ycol_vals[i] = y_ptr;
854 	}
855       else
856 	{
857 	  ycol_indices[2*i] = 0;
858 	  ycol_sz[i] = ycol_indices[2*i+1] = 0;
859 	  ycol_vals[i] = y_ptr;
860 	}
861     }
862 
863   /* close off stages for execution phase */
864   while (dim!=level)
865     {
866       stages[dim++]=i;
867       error_msg_warning("disconnected!!! dim(%d)!=level(%d)\n",dim,level);
868     }
869   stages[dim]=i;
870 
871   xyt_handle->info->n=xyt_handle->mvi->n;
872   xyt_handle->info->m=m;
873   xyt_handle->info->nnz=xt_nnz + yt_nnz;
874   xyt_handle->info->max_nnz=xt_max_nnz + yt_max_nnz;
875   xyt_handle->info->msg_buf_sz=stages[level]-stages[0];
876   xyt_handle->info->solve_uu = (REAL *) bss_malloc(m*sizeof(REAL));
877   xyt_handle->info->solve_w  = (REAL *) bss_malloc(m*sizeof(REAL));
878   xyt_handle->info->x=x;
879   xyt_handle->info->xcol_vals=xcol_vals;
880   xyt_handle->info->xcol_sz=xcol_sz;
881   xyt_handle->info->xcol_indices=xcol_indices;
882   xyt_handle->info->stages=stages;
883   xyt_handle->info->y=y;
884   xyt_handle->info->ycol_vals=ycol_vals;
885   xyt_handle->info->ycol_sz=ycol_sz;
886   xyt_handle->info->ycol_indices=ycol_indices;
887 
888   bss_free(segs);
889   bss_free(u);
890   bss_free(v);
891   bss_free(uu);
892   bss_free(z);
893   bss_free(w);
894 
895 #ifdef DEBUG
896   error_msg_warning("xyt_generate() :: end\n");
897 #endif
898 
899   return(0);
900 }
901 
902 
903 /*************************************xyt.c************************************
904 Function:
905 
906 Input :
907 Output:
908 Return:
909 Description:
910 **************************************xyt.c***********************************/
911 static
912 void
913 do_xyt_solve(xyt_ADT xyt_handle, register REAL *uc)
914 {
915   register int off, len, *iptr;
916   int level       =xyt_handle->level;
917   int n           =xyt_handle->info->n;
918   int m           =xyt_handle->info->m;
919   int *stages     =xyt_handle->info->stages;
920   int *xcol_indices=xyt_handle->info->xcol_indices;
921   int *ycol_indices=xyt_handle->info->ycol_indices;
922   register REAL *x_ptr, *y_ptr, *uu_ptr;
923 #if   BLAS||CBLAS
924   REAL zero=0.0;
925 #endif
926   REAL *solve_uu=xyt_handle->info->solve_uu;
927   REAL *solve_w =xyt_handle->info->solve_w;
928   REAL *x       =xyt_handle->info->x;
929   REAL *y       =xyt_handle->info->y;
930 
931 #ifdef DEBUG
932   error_msg_warning("do_xyt_solve() :: begin\n");
933 #endif
934 
935   uu_ptr=solve_uu;
936 #if   BLAS||CBLAS
937   copy(m,&zero,0,uu_ptr,1);
938 #else
939   rvec_zero(uu_ptr,m);
940 #endif
941 
942   /* x  = X.Y^T.b */
943   /* uu = Y^T.b */
944   for (y_ptr=y,iptr=ycol_indices; *iptr!=-1; y_ptr+=len)
945     {
946       off=*iptr++; len=*iptr++;
947 #if   BLAS||CBLAS
948       *uu_ptr++ = dot(len,uc+off,1,y_ptr,1);
949 #else
950       *uu_ptr++ = rvec_dot(uc+off,y_ptr,len);
951 #endif
952     }
953 
954   /* comunication of beta */
955   uu_ptr=solve_uu;
956   if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);}
957 
958 #if   BLAS&&CBLAS
959   copy(n,&zero,0,uc,1);
960 #else
961   rvec_zero(uc,n);
962 #endif
963 
964   /* x = X.uu */
965   for (x_ptr=x,iptr=xcol_indices; *iptr!=-1; x_ptr+=len)
966     {
967       off=*iptr++; len=*iptr++;
968 #if   BLAS&&CBLAS
969       axpy(len,*uu_ptr++,x_ptr,1,uc+off,1);
970 #else
971       rvec_axpy(uc+off,x_ptr,*uu_ptr++,len);
972 #endif
973     }
974 
975 #ifdef DEBUG
976   error_msg_warning("do_xyt_solve() :: end\n");
977 #endif
978 }
979 
980 
981 /*************************************Xyt.c************************************
982 Function: check_init
983 
984 Input :
985 Output:
986 Return:
987 Description:
988 **************************************xyt.c***********************************/
989 static
990 void
991 check_init(void)
992 {
993 #ifdef DEBUG
994   error_msg_warning("check_init() :: start %d\n",n_xyt_handles);
995 #endif
996 
997   comm_init();
998   /*
999   perm_init();
1000   bss_init();
1001   */
1002 
1003 #ifdef DEBUG
1004   error_msg_warning("check_init() :: end   %d\n",n_xyt_handles);
1005 #endif
1006 }
1007 
1008 
1009 /*************************************xyt.c************************************
1010 Function: check_handle()
1011 
1012 Input :
1013 Output:
1014 Return:
1015 Description:
1016 **************************************xyt.c***********************************/
1017 static
1018 void
1019 check_handle(xyt_ADT xyt_handle)
1020 {
1021 #ifdef SAFE
1022   int vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};
1023 #endif
1024 
1025 
1026 #ifdef DEBUG
1027   error_msg_warning("check_handle() :: start %d\n",n_xyt_handles);
1028 #endif
1029 
1030   if (xyt_handle==NULL)
1031     {error_msg_fatal("check_handle() :: bad handle :: NULL %d\n",xyt_handle);}
1032 
1033 #ifdef SAFE
1034   vals[0]=vals[1]=xyt_handle->id;
1035   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
1036   if ((vals[0]!=vals[1])||(xyt_handle->id<=0))
1037     {error_msg_fatal("check_handle() :: bad handle :: id mismatch min/max %d/%d %d\n",
1038 		     vals[0],vals[1], xyt_handle->id);}
1039 #endif
1040 
1041 #ifdef DEBUG
1042   error_msg_warning("check_handle() :: end   %d\n",n_xyt_handles);
1043 #endif
1044 }
1045 
1046 
1047 /*************************************xyt.c************************************
1048 Function: det_separators
1049 
1050 Input :
1051 Output:
1052 Return:
1053 Description:
1054   det_separators(xyt_handle, local2global, n, m, mylocmatvec, grid_data);
1055 **************************************xyt.c***********************************/
1056 static
1057 void
1058 det_separators(xyt_ADT xyt_handle)
1059 {
1060   int i, ct, id;
1061   int mask, edge, *iptr;
1062   int *dir, *used;
1063   int sum[4], w[4];
1064   REAL rsum[4], rw[4];
1065   int op[] = {GL_ADD,0};
1066   REAL *lhs, *rhs;
1067   int *nsep, *lnsep, *fo, nfo=0;
1068   gs_ADT gs_handle=xyt_handle->mvi->gs_handle;
1069   int *local2global=xyt_handle->mvi->local2global;
1070   int  n=xyt_handle->mvi->n;
1071   int  m=xyt_handle->mvi->m;
1072   int level=xyt_handle->level;
1073   int shared=FALSE;
1074 
1075 #ifdef DEBUG
1076   error_msg_warning("det_separators() :: start %d %d %d\n",level,n,m);
1077 #endif
1078 
1079   dir  = (int*)bss_malloc(INT_LEN*(level+1));
1080   nsep = (int*)bss_malloc(INT_LEN*(level+1));
1081   lnsep= (int*)bss_malloc(INT_LEN*(level+1));
1082   fo   = (int*)bss_malloc(INT_LEN*(n+1));
1083   used = (int*)bss_malloc(INT_LEN*n);
1084 
1085   ivec_zero(dir  ,level+1);
1086   ivec_zero(nsep ,level+1);
1087   ivec_zero(lnsep,level+1);
1088   ivec_set (fo   ,-1,n+1);
1089   ivec_zero(used,n);
1090 
1091   lhs  = (double*)bss_malloc(REAL_LEN*m);
1092   rhs  = (double*)bss_malloc(REAL_LEN*m);
1093 
1094   /* determine the # of unique dof */
1095   rvec_zero(lhs,m);
1096   rvec_set(lhs,1.0,n);
1097   gs_gop_hc(gs_handle,lhs,"+\0",level);
1098   error_msg_warning("done first gs_gop_hc\n");
1099   rvec_zero(rsum,2);
1100   for (ct=i=0;i<n;i++)
1101     {
1102       if (lhs[i]!=0.0)
1103 	{rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];}
1104 
1105       if (lhs[i]!=1.0)
1106 	{
1107           shared=TRUE;
1108         }
1109     }
1110 
1111   grop_hc(rsum,rw,2,op,level);
1112   rsum[0]+=0.1;
1113   rsum[1]+=0.1;
1114 
1115   /*
1116       if (!my_id)
1117       {
1118       printf("xyt n unique = %d (%g)\n",(int) rsum[0], rsum[0]);
1119       printf("xyt n shared = %d (%g)\n",(int) rsum[1], rsum[1]);
1120       }
1121   */
1122 
1123   xyt_handle->info->n_global=xyt_handle->info->m_global=(int) rsum[0];
1124   xyt_handle->mvi->n_global =xyt_handle->mvi->m_global =(int) rsum[0];
1125 
1126   /* determine separator sets top down */
1127   if (shared)
1128     {
1129       /* solution is to do as in the symmetric shared case but then */
1130       /* pick the sub-hc with the most free dofs and do a mat-vec   */
1131       /* and pick up the responses on the other sub-hc from the     */
1132       /* initial separator set obtained from the symm. shared case  */
1133       error_msg_fatal("shared dof separator determination not ready ... see hmt!!!\n");
1134       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
1135 	{
1136 	  /* set rsh of hc, fire, and collect lhs responses */
1137 	  (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
1138 	  gs_gop_hc(gs_handle,lhs,"+\0",edge);
1139 
1140 	  /* set lsh of hc, fire, and collect rhs responses */
1141 	  (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
1142 	  gs_gop_hc(gs_handle,rhs,"+\0",edge);
1143 
1144 	  for (i=0;i<n;i++)
1145 	    {
1146 	      if (id< mask)
1147 		{
1148 		  if (lhs[i]!=0.0)
1149 		    {lhs[i]=1.0;}
1150 		}
1151 	      if (id>=mask)
1152 		{
1153 		  if (rhs[i]!=0.0)
1154 		    {rhs[i]=1.0;}
1155 		}
1156 	    }
1157 
1158 	  if (id< mask)
1159 	    {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);}
1160 	  else
1161 	    {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);}
1162 
1163 	  /* count number of dofs I own that have signal and not in sep set */
1164 	  rvec_zero(rsum,4);
1165 	  for (ivec_zero(sum,4),ct=i=0;i<n;i++)
1166 	    {
1167 	      if (!used[i])
1168 		{
1169 		  /* number of unmarked dofs on node */
1170 		  ct++;
1171 		  /* number of dofs to be marked on lhs hc */
1172 		  if (id< mask)
1173 		    {
1174 		      if (lhs[i]!=0.0)
1175 			{sum[0]++; rsum[0]+=1.0/lhs[i];}
1176 		    }
1177 		  /* number of dofs to be marked on rhs hc */
1178 		  if (id>=mask)
1179 		    {
1180 		      if (rhs[i]!=0.0)
1181 			{sum[1]++; rsum[1]+=1.0/rhs[i];}
1182 		    }
1183 		}
1184 	    }
1185 
1186 	  /* go for load balance - choose half with most unmarked dofs, bias LHS */
1187 	  (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
1188 	  (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
1189 	  giop_hc(sum,w,4,op,edge);
1190 	  grop_hc(rsum,rw,4,op,edge);
1191 	  rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;
1192 
1193 	  if (id<mask)
1194 	    {
1195 	      /* mark dofs I own that have signal and not in sep set */
1196 	      for (ct=i=0;i<n;i++)
1197 		{
1198 		  if ((!used[i])&&(lhs[i]!=0.0))
1199 		    {
1200 		      ct++; nfo++;
1201 
1202 		      if (nfo>n)
1203 			{error_msg_fatal("nfo about to exceed n\n");}
1204 
1205 		      *--iptr = local2global[i];
1206 		      used[i]=edge;
1207 		    }
1208 		}
1209 	      if (ct>1) {ivec_sort(iptr,ct);}
1210 
1211 	      lnsep[edge]=ct;
1212 	      nsep[edge]=(int) rsum[0];
1213 	      dir [edge]=LEFT;
1214 	    }
1215 
1216 	  if (id>=mask)
1217 	    {
1218 	      /* mark dofs I own that have signal and not in sep set */
1219 	      for (ct=i=0;i<n;i++)
1220 		{
1221 		  if ((!used[i])&&(rhs[i]!=0.0))
1222 		    {
1223 		      ct++; nfo++;
1224 
1225 		      if (nfo>n)
1226 			{error_msg_fatal("nfo about to exceed n\n");}
1227 
1228 		      *--iptr = local2global[i];
1229 		      used[i]=edge;
1230 		    }
1231 		}
1232 	      if (ct>1) {ivec_sort(iptr,ct);}
1233 
1234 	      lnsep[edge]=ct;
1235 	      nsep[edge]= (int) rsum[1];
1236 	      dir [edge]=RIGHT;
1237 	    }
1238 
1239 	  /* LATER or we can recur on these to order seps at this level */
1240 	  /* do we need full set of separators for this?                */
1241 
1242 	  /* fold rhs hc into lower */
1243 	  if (id>=mask)
1244 	    {id-=mask;}
1245 	}
1246     }
1247   else
1248     {
1249       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
1250 	{
1251 	  /* set rsh of hc, fire, and collect lhs responses */
1252 	  (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
1253 	  gs_gop_hc(gs_handle,lhs,"+\0",edge);
1254 
1255 	  /* set lsh of hc, fire, and collect rhs responses */
1256 	  (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
1257 	  gs_gop_hc(gs_handle,rhs,"+\0",edge);
1258 
1259 	  /* count number of dofs I own that have signal and not in sep set */
1260 	  for (ivec_zero(sum,4),ct=i=0;i<n;i++)
1261 	    {
1262 	      if (!used[i])
1263 		{
1264 		  /* number of unmarked dofs on node */
1265 		  ct++;
1266 		  /* number of dofs to be marked on lhs hc */
1267 		  if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;}
1268 		  /* number of dofs to be marked on rhs hc */
1269 		  if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;}
1270 		}
1271 	    }
1272 
1273 	  /* for the non-symmetric case we need separators of width 2 */
1274 	  /* so take both sides */
1275 	  (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
1276 	  giop_hc(sum,w,4,op,edge);
1277 
1278 	  ct=0;
1279 	  if (id<mask)
1280 	    {
1281 	      /* mark dofs I own that have signal and not in sep set */
1282 	      for (i=0;i<n;i++)
1283 		{
1284 		  if ((!used[i])&&(lhs[i]!=0.0))
1285 		    {
1286 		      ct++; nfo++;
1287 		      *--iptr = local2global[i];
1288 		      used[i]=edge;
1289 		    }
1290 		}
1291 	      /* LSH hc summation of ct should be sum[0] */
1292 	    }
1293 	  else
1294 	    {
1295 	      /* mark dofs I own that have signal and not in sep set */
1296 	      for (i=0;i<n;i++)
1297 		{
1298 		  if ((!used[i])&&(rhs[i]!=0.0))
1299 		    {
1300 		      ct++; nfo++;
1301 		      *--iptr = local2global[i];
1302 		      used[i]=edge;
1303 		    }
1304 		}
1305 	      /* RSH hc summation of ct should be sum[1] */
1306 	    }
1307 
1308 	  if (ct>1) {ivec_sort(iptr,ct);}
1309 	  lnsep[edge]=ct;
1310 	  nsep[edge]=sum[0]+sum[1];
1311 	  dir [edge]=BOTH;
1312 
1313 	  /* LATER or we can recur on these to order seps at this level */
1314 	  /* do we need full set of separators for this?                */
1315 
1316 	  /* fold rhs hc into lower */
1317 	  if (id>=mask)
1318 	    {id-=mask;}
1319 	}
1320     }
1321 
1322   /* level 0 is on processor case - so mark the remainder */
1323   for (ct=i=0;i<n;i++)
1324     {
1325       if (!used[i])
1326 	{
1327 	  ct++; nfo++;
1328 	  *--iptr = local2global[i];
1329 	  used[i]=edge;
1330 	}
1331     }
1332   if (ct>1) {ivec_sort(iptr,ct);}
1333   lnsep[edge]=ct;
1334   nsep [edge]=ct;
1335   dir  [edge]=BOTH;
1336 
1337   xyt_handle->info->nsep=nsep;
1338   xyt_handle->info->lnsep=lnsep;
1339   xyt_handle->info->fo=fo;
1340   xyt_handle->info->nfo=nfo;
1341 
1342   bss_free(dir);
1343   bss_free(lhs);
1344   bss_free(rhs);
1345   bss_free(used);
1346 
1347 #ifdef DEBUG
1348   error_msg_warning("det_separators() :: end\n");
1349 #endif
1350 }
1351 
1352 
1353 /*************************************xyt.c************************************
1354 Function: set_mvi
1355 
1356 Input :
1357 Output:
1358 Return:
1359 Description:
1360 **************************************xyt.c***********************************/
1361 static
1362 mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data)
1363 {
1364   mv_info *mvi;
1365 
1366 
1367 #ifdef DEBUG
1368   error_msg_warning("set_mvi() :: start\n");
1369 #endif
1370 
1371   mvi = (mv_info*)bss_malloc(sizeof(mv_info));
1372   mvi->n=n;
1373   mvi->m=m;
1374   mvi->n_global=-1;
1375   mvi->m_global=-1;
1376   mvi->local2global=(int*)bss_malloc((m+1)*INT_LEN);
1377   ivec_copy(mvi->local2global,local2global,m);
1378   mvi->local2global[m] = INT_MAX;
1379   mvi->matvec=(PetscErrorCode (*)(mv_info*,REAL*,REAL*))matvec;
1380   mvi->grid_data=grid_data;
1381 
1382   /* set xyt communication handle to perform restricted matvec */
1383   mvi->gs_handle = gs_init(local2global, m, num_nodes);
1384 
1385 #ifdef DEBUG
1386   error_msg_warning("set_mvi() :: end   \n");
1387 #endif
1388 
1389   return(mvi);
1390 }
1391 
1392 
1393 /*************************************xyt.c************************************
1394 Function: set_mvi
1395 
1396 Input :
1397 Output:
1398 Return:
1399 Description:
1400 
1401       computes u = A.v
1402       do_matvec(xyt_handle->mvi,v,u);
1403 **************************************xyt.c***********************************/
1404 static
1405 void do_matvec(mv_info *A, REAL *v, REAL *u)
1406 {
1407   A->matvec((mv_info*)A->grid_data,v,u);
1408 }
1409 
1410 
1411 
1412