xref: /petsc/src/dm/impls/swarm/data_ex.c (revision 095059a4a9da3c077576069a1cce327c884bb767)
1*095059a4SDave May /*
2*095059a4SDave May Build a few basic tools to help with partitioned domains.
3*095059a4SDave May 
4*095059a4SDave May 1)
5*095059a4SDave May On each processor, have a DomainExchangerTopology.
6*095059a4SDave May This is a doubly-connected edge list which enumerates the
7*095059a4SDave May communication paths between connected processors. By numbering
8*095059a4SDave May these paths we can always uniquely assign message identifers.
9*095059a4SDave May 
10*095059a4SDave May         edge
11*095059a4SDave May          10
12*095059a4SDave May proc  --------->  proc
13*095059a4SDave May  0    <--------    1
14*095059a4SDave May          11
15*095059a4SDave May         twin
16*095059a4SDave May 
17*095059a4SDave May Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18*095059a4SDave May message, proc 1 looks for the edge connected to proc 0, and then the
19*095059a4SDave May messgae id comes from the twin of that edge
20*095059a4SDave May 
21*095059a4SDave May 2)
22*095059a4SDave May A DomainExchangerArrayPacker.
23*095059a4SDave May A little function which given a piece of data, will memcpy the data into
24*095059a4SDave May an array (which will be sent to procs) into the correct place.
25*095059a4SDave May 
26*095059a4SDave May On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27*095059a4SDave May All data gets jammed into single array. Need to "jam" data into correct locations
28*095059a4SDave May The Packer knows how much is to going to each processor and keeps track of the inserts
29*095059a4SDave May so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30*095059a4SDave May 
31*095059a4SDave May data to 0    data to 2       data to 3
32*095059a4SDave May 
33*095059a4SDave May |--------|-----------------|--|
34*095059a4SDave May 
35*095059a4SDave May 
36*095059a4SDave May User has to unpack message themselves. I can get you the pointer for each i
37*095059a4SDave May entry, but you'll have to cast it to the appropriate data type.
38*095059a4SDave May 
39*095059a4SDave May 
40*095059a4SDave May 
41*095059a4SDave May 
42*095059a4SDave May Phase A: Build topology
43*095059a4SDave May 
44*095059a4SDave May Phase B: Define message lengths
45*095059a4SDave May 
46*095059a4SDave May Phase C: Pack data
47*095059a4SDave May 
48*095059a4SDave May Phase D: Send data
49*095059a4SDave May 
50*095059a4SDave May //
51*095059a4SDave May DataExCreate()
52*095059a4SDave May // A
53*095059a4SDave May DataExTopologyInitialize()
54*095059a4SDave May DataExTopologyAddNeighbour()
55*095059a4SDave May DataExTopologyAddNeighbour()
56*095059a4SDave May DataExTopologyFinalize()
57*095059a4SDave May // B
58*095059a4SDave May DataExZeroAllSendCount()
59*095059a4SDave May DataExAddToSendCount()
60*095059a4SDave May DataExAddToSendCount()
61*095059a4SDave May DataExAddToSendCount()
62*095059a4SDave May // C
63*095059a4SDave May DataExPackInitialize()
64*095059a4SDave May DataExPackData()
65*095059a4SDave May DataExPackData()
66*095059a4SDave May DataExPackFinalize()
67*095059a4SDave May // D
68*095059a4SDave May DataExBegin()
69*095059a4SDave May // ... perform any calculations ... ///
70*095059a4SDave May DataExEnd()
71*095059a4SDave May 
72*095059a4SDave May // Call any getters //
73*095059a4SDave May 
74*095059a4SDave May 
75*095059a4SDave May */
76*095059a4SDave May 
77*095059a4SDave May #include <petsc.h>
78*095059a4SDave May #include <petscvec.h>
79*095059a4SDave May #include <petscmat.h>
80*095059a4SDave May 
81*095059a4SDave May #include "data_ex.h"
82*095059a4SDave May 
83*095059a4SDave May const char *status_names[] = { "initialized", "finalized", "unknown" };
84*095059a4SDave May 
85*095059a4SDave May PetscLogEvent PTATIN_DataExchangerTopologySetup;
86*095059a4SDave May PetscLogEvent PTATIN_DataExchangerBegin;
87*095059a4SDave May PetscLogEvent PTATIN_DataExchangerEnd;
88*095059a4SDave May 
89*095059a4SDave May 
90*095059a4SDave May #undef __FUNCT__
91*095059a4SDave May #define __FUNCT__ "DataExCreate"
92*095059a4SDave May DataEx DataExCreate(MPI_Comm comm,const PetscInt count)
93*095059a4SDave May {
94*095059a4SDave May 	DataEx         d;
95*095059a4SDave May 	PetscErrorCode ierr;
96*095059a4SDave May 
97*095059a4SDave May 	d = (DataEx)malloc( sizeof(struct _p_DataEx) );
98*095059a4SDave May 	memset( d, 0, sizeof(struct _p_DataEx) );
99*095059a4SDave May 
100*095059a4SDave May 	ierr = MPI_Comm_dup(comm,&d->comm);
101*095059a4SDave May 	ierr = MPI_Comm_rank(d->comm,&d->rank);
102*095059a4SDave May 
103*095059a4SDave May 	d->instance = count;
104*095059a4SDave May 
105*095059a4SDave May 	d->topology_status        = DEOBJECT_STATE_UNKNOWN;
106*095059a4SDave May 	d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
107*095059a4SDave May 	d->packer_status          = DEOBJECT_STATE_UNKNOWN;
108*095059a4SDave May 	d->communication_status   = DEOBJECT_STATE_UNKNOWN;
109*095059a4SDave May 
110*095059a4SDave May 	d->n_neighbour_procs = -1;
111*095059a4SDave May 	d->neighbour_procs   = NULL;
112*095059a4SDave May 
113*095059a4SDave May 	d->messages_to_be_sent      = NULL;
114*095059a4SDave May 	d->message_offsets          = NULL;
115*095059a4SDave May 	d->messages_to_be_recvieved = NULL;
116*095059a4SDave May 
117*095059a4SDave May 	d->unit_message_size   = -1;
118*095059a4SDave May 	d->send_message        = NULL;
119*095059a4SDave May 	d->send_message_length = -1;
120*095059a4SDave May 	d->recv_message        = NULL;
121*095059a4SDave May 	d->recv_message_length = -1;
122*095059a4SDave May 	d->total_pack_cnt      = -1;
123*095059a4SDave May 	d->pack_cnt            = NULL;
124*095059a4SDave May 
125*095059a4SDave May 	d->send_tags = NULL;
126*095059a4SDave May 	d->recv_tags = NULL;
127*095059a4SDave May 
128*095059a4SDave May 	d->_stats    = NULL;
129*095059a4SDave May 	d->_requests = NULL;
130*095059a4SDave May 
131*095059a4SDave May 	return d;
132*095059a4SDave May }
133*095059a4SDave May 
134*095059a4SDave May #undef __FUNCT__
135*095059a4SDave May #define __FUNCT__ "DataExView"
136*095059a4SDave May PetscErrorCode DataExView(DataEx d)
137*095059a4SDave May {
138*095059a4SDave May 	PetscMPIInt p;
139*095059a4SDave May 
140*095059a4SDave May 
141*095059a4SDave May 	PetscFunctionBegin;
142*095059a4SDave May 	PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);
143*095059a4SDave May 
144*095059a4SDave May 	PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);
145*095059a4SDave May 	PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );
146*095059a4SDave May 	PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );
147*095059a4SDave May 	PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );
148*095059a4SDave May 
149*095059a4SDave May 	if (d->topology_status == DEOBJECT_FINALIZED) {
150*095059a4SDave May 		PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");
151*095059a4SDave May 		PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );
152*095059a4SDave May 		for (p=0; p<d->n_neighbour_procs; p++) {
153*095059a4SDave May 			PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);
154*095059a4SDave May 		}
155*095059a4SDave May 	}
156*095059a4SDave May 
157*095059a4SDave May 	if (d->message_lengths_status == DEOBJECT_FINALIZED) {
158*095059a4SDave May 		PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");
159*095059a4SDave May 		PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );
160*095059a4SDave May 		for (p=0; p<d->n_neighbour_procs; p++) {
161*095059a4SDave May 			PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );
162*095059a4SDave May 		}
163*095059a4SDave May 		for (p=0; p<d->n_neighbour_procs; p++) {
164*095059a4SDave May 			PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );
165*095059a4SDave May 		}
166*095059a4SDave May 	}
167*095059a4SDave May 
168*095059a4SDave May 	if (d->packer_status == DEOBJECT_FINALIZED) {
169*095059a4SDave May 
170*095059a4SDave May 	}
171*095059a4SDave May 
172*095059a4SDave May 	if (d->communication_status == DEOBJECT_FINALIZED) {
173*095059a4SDave May 
174*095059a4SDave May 	}
175*095059a4SDave May 
176*095059a4SDave May 	PetscFunctionReturn(0);
177*095059a4SDave May }
178*095059a4SDave May 
179*095059a4SDave May #undef __FUNCT__
180*095059a4SDave May #define __FUNCT__ "DataExDestroy"
181*095059a4SDave May PetscErrorCode DataExDestroy(DataEx d)
182*095059a4SDave May {
183*095059a4SDave May 	PetscErrorCode ierr;
184*095059a4SDave May 
185*095059a4SDave May 	PetscFunctionBegin;
186*095059a4SDave May 	ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
187*095059a4SDave May 
188*095059a4SDave May 	if (d->neighbour_procs != NULL) {
189*095059a4SDave May 		free(d->neighbour_procs);
190*095059a4SDave May 	}
191*095059a4SDave May 
192*095059a4SDave May 	if (d->messages_to_be_sent != NULL) {
193*095059a4SDave May 		free(d->messages_to_be_sent);
194*095059a4SDave May 	}
195*095059a4SDave May 
196*095059a4SDave May 	if (d->message_offsets != NULL) {
197*095059a4SDave May 		free(d->message_offsets);
198*095059a4SDave May 	}
199*095059a4SDave May 
200*095059a4SDave May 	if (d->messages_to_be_recvieved != NULL) {
201*095059a4SDave May 		free(d->messages_to_be_recvieved);
202*095059a4SDave May 	}
203*095059a4SDave May 
204*095059a4SDave May 	if (d->send_message != NULL) {
205*095059a4SDave May 		free(d->send_message);
206*095059a4SDave May 	}
207*095059a4SDave May 
208*095059a4SDave May 	if (d->recv_message != NULL) {
209*095059a4SDave May 		free(d->recv_message);
210*095059a4SDave May 	}
211*095059a4SDave May 
212*095059a4SDave May 	if (d->pack_cnt != NULL) {
213*095059a4SDave May 		free(d->pack_cnt);
214*095059a4SDave May 	}
215*095059a4SDave May 
216*095059a4SDave May 	if (d->send_tags != NULL) {
217*095059a4SDave May 		free(d->send_tags);
218*095059a4SDave May 	}
219*095059a4SDave May 	if (d->recv_tags != NULL) {
220*095059a4SDave May 		free(d->recv_tags);
221*095059a4SDave May 	}
222*095059a4SDave May 
223*095059a4SDave May 	if (d->_stats != NULL) {
224*095059a4SDave May 		free(d->_stats);
225*095059a4SDave May 	}
226*095059a4SDave May 	if (d->_requests != NULL) {
227*095059a4SDave May 		free(d->_requests);
228*095059a4SDave May 	}
229*095059a4SDave May 
230*095059a4SDave May 	free(d);
231*095059a4SDave May 
232*095059a4SDave May 	PetscFunctionReturn(0);
233*095059a4SDave May }
234*095059a4SDave May 
235*095059a4SDave May /* === Phase A === */
236*095059a4SDave May 
237*095059a4SDave May #undef __FUNCT__
238*095059a4SDave May #define __FUNCT__ "DataExTopologyInitialize"
239*095059a4SDave May PetscErrorCode DataExTopologyInitialize(DataEx d)
240*095059a4SDave May {
241*095059a4SDave May 	PetscFunctionBegin;
242*095059a4SDave May 	d->topology_status = DEOBJECT_INITIALIZED;
243*095059a4SDave May 
244*095059a4SDave May 	d->n_neighbour_procs = 0;
245*095059a4SDave May 	if (d->neighbour_procs          != NULL)  {  free(d->neighbour_procs);            d->neighbour_procs          = NULL;  }
246*095059a4SDave May 	if (d->messages_to_be_sent      != NULL)  {  free(d->messages_to_be_sent);        d->messages_to_be_sent      = NULL;  }
247*095059a4SDave May 	if (d->message_offsets          != NULL)  {  free(d->message_offsets);            d->message_offsets          = NULL;  }
248*095059a4SDave May 	if (d->messages_to_be_recvieved != NULL)  {  free(d->messages_to_be_recvieved);   d->messages_to_be_recvieved = NULL;  }
249*095059a4SDave May 	if (d->pack_cnt                 != NULL)  {  free(d->pack_cnt);                   d->pack_cnt                 = NULL;  }
250*095059a4SDave May 
251*095059a4SDave May 	if (d->send_tags != NULL)                 {  free(d->send_tags);                  d->send_tags = NULL;  }
252*095059a4SDave May 	if (d->recv_tags != NULL)                 {  free(d->recv_tags);                  d->recv_tags = NULL;  }
253*095059a4SDave May 
254*095059a4SDave May 	PetscFunctionReturn(0);
255*095059a4SDave May }
256*095059a4SDave May 
257*095059a4SDave May #undef __FUNCT__
258*095059a4SDave May #define __FUNCT__ "DataExTopologyAddNeighbour"
259*095059a4SDave May PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
260*095059a4SDave May {
261*095059a4SDave May 	PetscMPIInt    n,found;
262*095059a4SDave May 	PetscMPIInt    nproc;
263*095059a4SDave May 	PetscErrorCode ierr;
264*095059a4SDave May 
265*095059a4SDave May 
266*095059a4SDave May 	PetscFunctionBegin;
267*095059a4SDave May 	if (d->topology_status == DEOBJECT_FINALIZED) {
268*095059a4SDave May 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first" );
269*095059a4SDave May 	}
270*095059a4SDave May 	else if (d->topology_status != DEOBJECT_INITIALIZED) {
271*095059a4SDave May 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" );
272*095059a4SDave May 	}
273*095059a4SDave May 
274*095059a4SDave May 	/* error on negative entries */
275*095059a4SDave May 	if (proc_id < 0) {
276*095059a4SDave May 		SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
277*095059a4SDave May 	}
278*095059a4SDave May 	/* error on ranks larger than number of procs in communicator */
279*095059a4SDave May 	ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
280*095059a4SDave May 	if (proc_id >= nproc) {
281*095059a4SDave May 		SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
282*095059a4SDave May 	}
283*095059a4SDave May 
284*095059a4SDave May 	if (d->n_neighbour_procs == 0) {
285*095059a4SDave May 		d->neighbour_procs = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) );
286*095059a4SDave May 	}
287*095059a4SDave May 
288*095059a4SDave May 	/* check for proc_id */
289*095059a4SDave May 	found = 0;
290*095059a4SDave May 	for (n=0; n<d->n_neighbour_procs; n++) {
291*095059a4SDave May 		if (d->neighbour_procs[n] == proc_id) {
292*095059a4SDave May 			found  = 1;
293*095059a4SDave May 		}
294*095059a4SDave May 	}
295*095059a4SDave May 	if (found == 0) { /* add it to list */
296*095059a4SDave May 		PetscMPIInt *tmp;
297*095059a4SDave May 
298*095059a4SDave May 		tmp = (PetscMPIInt*)realloc( d->neighbour_procs, sizeof(PetscMPIInt)*(d->n_neighbour_procs+1) );
299*095059a4SDave May 		d->neighbour_procs = tmp;
300*095059a4SDave May 
301*095059a4SDave May 		d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
302*095059a4SDave May 		d->n_neighbour_procs++;
303*095059a4SDave May 	}
304*095059a4SDave May 
305*095059a4SDave May 	PetscFunctionReturn(0);
306*095059a4SDave May }
307*095059a4SDave May 
308*095059a4SDave May /*
309*095059a4SDave May counter: the index of the communication object
310*095059a4SDave May N: the number of processors
311*095059a4SDave May r0: rank of sender
312*095059a4SDave May r1: rank of receiver
313*095059a4SDave May 
314*095059a4SDave May procs = { 0, 1, 2, 3 }
315*095059a4SDave May 
316*095059a4SDave May 0 ==> 0		e=0
317*095059a4SDave May 0 ==> 1		e=1
318*095059a4SDave May 0 ==> 2		e=2
319*095059a4SDave May 0 ==> 3		e=3
320*095059a4SDave May 
321*095059a4SDave May 1 ==> 0		e=4
322*095059a4SDave May 1 ==> 1		e=5
323*095059a4SDave May 1 ==> 2		e=6
324*095059a4SDave May 1 ==> 3		e=7
325*095059a4SDave May 
326*095059a4SDave May 2 ==> 0		e=8
327*095059a4SDave May 2 ==> 1		e=9
328*095059a4SDave May 2 ==> 2		e=10
329*095059a4SDave May 2 ==> 3		e=11
330*095059a4SDave May 
331*095059a4SDave May 3 ==> 0		e=12
332*095059a4SDave May 3 ==> 1		e=13
333*095059a4SDave May 3 ==> 2		e=14
334*095059a4SDave May 3 ==> 3		e=15
335*095059a4SDave May 
336*095059a4SDave May If we require that proc A sends to proc B, then the SEND tag index will be given by
337*095059a4SDave May   N * rank(A) + rank(B) + offset
338*095059a4SDave May If we require that proc A will receive from proc B, then the RECV tag index will be given by
339*095059a4SDave May   N * rank(B) + rank(A) + offset
340*095059a4SDave May 
341*095059a4SDave May */
342*095059a4SDave May void _get_tags( PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt )
343*095059a4SDave May {
344*095059a4SDave May 	PetscMPIInt st,rt;
345*095059a4SDave May 
346*095059a4SDave May 
347*095059a4SDave May 	st = N*r0 + r1   +   N*N*counter;
348*095059a4SDave May 	rt = N*r1 + r0   +   N*N*counter;
349*095059a4SDave May 
350*095059a4SDave May 	*_st = st;
351*095059a4SDave May 	*_rt = rt;
352*095059a4SDave May }
353*095059a4SDave May 
354*095059a4SDave May /*
355*095059a4SDave May Makes the communication map symmetric
356*095059a4SDave May */
357*095059a4SDave May #undef __FUNCT__
358*095059a4SDave May #define __FUNCT__ "_DataExCompleteCommunicationMap"
359*095059a4SDave May PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
360*095059a4SDave May {
361*095059a4SDave May 	Mat               A,redA;
362*095059a4SDave May 	PetscInt          i,j,nc;
363*095059a4SDave May 	PetscInt          n_, *proc_neighbours_;
364*095059a4SDave May     PetscInt          rank_i_;
365*095059a4SDave May 	PetscMPIInt       size,  rank_i;
366*095059a4SDave May 	PetscInt          max_nnz;
367*095059a4SDave May 	PetscScalar       *vals;
368*095059a4SDave May 	const PetscInt    *cols;
369*095059a4SDave May 	const PetscScalar *red_vals;
370*095059a4SDave May 	PetscMPIInt       _n_new, *_proc_neighbours_new;
371*095059a4SDave May 	PetscBool         is_seqaij;
372*095059a4SDave May 	PetscLogDouble    t0,t1;
373*095059a4SDave May 	PetscErrorCode    ierr;
374*095059a4SDave May 
375*095059a4SDave May 
376*095059a4SDave May 	PetscFunctionBegin;
377*095059a4SDave May 	PetscPrintf(PETSC_COMM_WORLD,"************************** Starting _DataExCompleteCommunicationMap ************************** \n");
378*095059a4SDave May 	PetscTime(&t0);
379*095059a4SDave May 
380*095059a4SDave May 	n_ = n;
381*095059a4SDave May 	ierr = PetscMalloc( sizeof(PetscInt) * n_, &proc_neighbours_ );CHKERRQ(ierr);
382*095059a4SDave May 	for (i=0; i<n_; i++) {
383*095059a4SDave May 		proc_neighbours_[i] = proc_neighbours[i];
384*095059a4SDave May 	}
385*095059a4SDave May 
386*095059a4SDave May 	ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
387*095059a4SDave May 	ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
388*095059a4SDave May 	rank_i_ = rank_i;
389*095059a4SDave May 
390*095059a4SDave May 	ierr = MatCreate(comm,&A);CHKERRQ(ierr);
391*095059a4SDave May 	ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
392*095059a4SDave May 	ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
393*095059a4SDave May 
394*095059a4SDave May 	ierr = MPI_Allreduce(&n_,&max_nnz,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
395*095059a4SDave May 	ierr = MatSeqAIJSetPreallocation(A,n_,NULL);CHKERRQ(ierr);
396*095059a4SDave May 	PetscPrintf(PETSC_COMM_WORLD,"max_nnz = %D \n", max_nnz );
397*095059a4SDave May 	//printf("[%d]: nnz = %d \n", rank_i,n_ );
398*095059a4SDave May 	{
399*095059a4SDave May 		ierr = MatMPIAIJSetPreallocation(A,1,NULL,n_,NULL);CHKERRQ(ierr);
400*095059a4SDave May 	}
401*095059a4SDave May 
402*095059a4SDave May 
403*095059a4SDave May 	/* Build original map */
404*095059a4SDave May 	ierr = PetscMalloc( sizeof(PetscScalar)*n_, &vals );CHKERRQ(ierr);
405*095059a4SDave May 	for (i=0; i<n_; i++) {
406*095059a4SDave May 		vals[i] = 1.0;
407*095059a4SDave May 	}
408*095059a4SDave May 	ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
409*095059a4SDave May 
410*095059a4SDave May 	ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
411*095059a4SDave May 	ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
412*095059a4SDave May 
413*095059a4SDave May 	/* Now force all other connections if they are not already there */
414*095059a4SDave May 	/* It's more efficient to do them all at once */
415*095059a4SDave May 	for (i=0; i<n_; i++) {
416*095059a4SDave May 		vals[i] = 2.0;
417*095059a4SDave May 	}
418*095059a4SDave May 	ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
419*095059a4SDave May 
420*095059a4SDave May 	ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
421*095059a4SDave May 	ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
422*095059a4SDave May 
423*095059a4SDave May 	ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
424*095059a4SDave May 	ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
425*095059a4SDave May         ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
426*095059a4SDave May 
427*095059a4SDave May 	/*
428*095059a4SDave May 	What the fuck is this? Is this necessary at all?
429*095059a4SDave May 	Why cannot I just use MatGetRow on the single row living on the current rank??
430*095059a4SDave May 	Seems like a fine thing to do provided the operation MatGetRow() is supported for MATMPIAIJ
431*095059a4SDave May 	~ DAM, Feb 26, 2013 (using petsc 3.2)
432*095059a4SDave May 	*/
433*095059a4SDave May 	/* Duplicate the entire matrix on ALL cpu's */
434*095059a4SDave May 	/*
435*095059a4SDave May 	 MatGetRedundantMatrix is not supported for SEQAIJ, thus we
436*095059a4SDave May 	 fake a redundant matrix by setting equal to A. This enables the
437*095059a4SDave May 	 code to run on one cpu (even if this seems slightly odd).
438*095059a4SDave May 	 */
439*095059a4SDave May 	is_seqaij = PETSC_FALSE;
440*095059a4SDave May 	ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&is_seqaij);CHKERRQ(ierr);
441*095059a4SDave May //	if (is_seqaij==PETSC_FALSE) {
442*095059a4SDave May //		ierr = MatGetRedundantMatrix( A, size_, PETSC_COMM_SELF, size_, MAT_INITIAL_MATRIX, &redA );CHKERRQ(ierr);
443*095059a4SDave May //	} else {
444*095059a4SDave May 		redA = A;
445*095059a4SDave May 		ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
446*095059a4SDave May //	}
447*095059a4SDave May 
448*095059a4SDave May 	if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
449*095059a4SDave May 
450*095059a4SDave May 		ierr = MatGetRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
451*095059a4SDave May 
452*095059a4SDave May 		_n_new = (PetscMPIInt)nc;
453*095059a4SDave May 		_proc_neighbours_new = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) * _n_new );
454*095059a4SDave May 
455*095059a4SDave May 		for (j=0; j<nc; j++) {
456*095059a4SDave May 			_proc_neighbours_new[j] = (PetscMPIInt)cols[j];
457*095059a4SDave May 		}
458*095059a4SDave May 		ierr = MatRestoreRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
459*095059a4SDave May 
460*095059a4SDave May 		*n_new               = (PetscMPIInt)_n_new;
461*095059a4SDave May 		*proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
462*095059a4SDave May 	}
463*095059a4SDave May 
464*095059a4SDave May 	ierr = MatDestroy(&redA);CHKERRQ(ierr);
465*095059a4SDave May 	ierr = MatDestroy(&A);CHKERRQ(ierr);
466*095059a4SDave May 	ierr = PetscFree(vals);CHKERRQ(ierr);
467*095059a4SDave May 	ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
468*095059a4SDave May 
469*095059a4SDave May 	ierr = MPI_Barrier(comm);CHKERRQ(ierr);
470*095059a4SDave May 	PetscTime(&t1);
471*095059a4SDave May 	PetscPrintf(PETSC_COMM_WORLD,"************************** Ending _DataExCompleteCommunicationMap [setup time: %1.4e (sec)] ************************** \n",t1-t0);
472*095059a4SDave May 
473*095059a4SDave May 	PetscFunctionReturn(0);
474*095059a4SDave May }
475*095059a4SDave May 
476*095059a4SDave May #undef __FUNCT__
477*095059a4SDave May #define __FUNCT__ "DataExTopologyFinalize"
478*095059a4SDave May PetscErrorCode DataExTopologyFinalize(DataEx d)
479*095059a4SDave May {
480*095059a4SDave May 	PetscMPIInt    symm_nn;
481*095059a4SDave May 	PetscMPIInt   *symm_procs;
482*095059a4SDave May 	PetscMPIInt    r0,n,st,rt;
483*095059a4SDave May 	PetscMPIInt    nprocs;
484*095059a4SDave May 	PetscErrorCode ierr;
485*095059a4SDave May 
486*095059a4SDave May 
487*095059a4SDave May 	PetscFunctionBegin;
488*095059a4SDave May 	if (d->topology_status != DEOBJECT_INITIALIZED) {
489*095059a4SDave May 		SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" );
490*095059a4SDave May 	}
491*095059a4SDave May 	ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
492*095059a4SDave May 
493*095059a4SDave May 	/* given infomation about all my neighbours, make map symmetric */
494*095059a4SDave May 	ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
495*095059a4SDave May 	/* update my arrays */
496*095059a4SDave May 	free(d->neighbour_procs);
497*095059a4SDave May 
498*095059a4SDave May 	d->n_neighbour_procs = symm_nn;
499*095059a4SDave May 	d->neighbour_procs   = symm_procs;
500*095059a4SDave May 
501*095059a4SDave May 
502*095059a4SDave May 	/* allocates memory */
503*095059a4SDave May 	if (d->messages_to_be_sent == NULL) {
504*095059a4SDave May 		d->messages_to_be_sent = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
505*095059a4SDave May 	}
506*095059a4SDave May 	if (d->message_offsets == NULL) {
507*095059a4SDave May 		d->message_offsets = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
508*095059a4SDave May 	}
509*095059a4SDave May 	if (d->messages_to_be_recvieved == NULL) {
510*095059a4SDave May 		d->messages_to_be_recvieved = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
511*095059a4SDave May 	}
512*095059a4SDave May 
513*095059a4SDave May 	if (d->pack_cnt == NULL) {
514*095059a4SDave May 		d->pack_cnt = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs );
515*095059a4SDave May 	}
516*095059a4SDave May 
517*095059a4SDave May 	if (d->_stats == NULL) {
518*095059a4SDave May 		d->_stats = (MPI_Status*)malloc( sizeof(MPI_Status) * 2*d->n_neighbour_procs );
519*095059a4SDave May 	}
520*095059a4SDave May 	if (d->_requests == NULL) {
521*095059a4SDave May 		d->_requests = (MPI_Request*)malloc( sizeof(MPI_Request) * 2*d->n_neighbour_procs );
522*095059a4SDave May 	}
523*095059a4SDave May 
524*095059a4SDave May 	if (d->send_tags == NULL) {
525*095059a4SDave May 		d->send_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs );
526*095059a4SDave May 	}
527*095059a4SDave May 	if (d->recv_tags == NULL) {
528*095059a4SDave May 		d->recv_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs );
529*095059a4SDave May 	}
530*095059a4SDave May 
531*095059a4SDave May 	/* compute message tags */
532*095059a4SDave May 	ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
533*095059a4SDave May 	r0 = d->rank;
534*095059a4SDave May 	for (n=0; n<d->n_neighbour_procs; n++) {
535*095059a4SDave May 		PetscMPIInt r1 = d->neighbour_procs[n];
536*095059a4SDave May 
537*095059a4SDave May 		_get_tags( d->instance, nprocs, r0,r1, &st, &rt );
538*095059a4SDave May 
539*095059a4SDave May 		d->send_tags[n] = (int)st;
540*095059a4SDave May 		d->recv_tags[n] = (int)rt;
541*095059a4SDave May 	}
542*095059a4SDave May 
543*095059a4SDave May 	d->topology_status = DEOBJECT_FINALIZED;
544*095059a4SDave May 	ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
545*095059a4SDave May 
546*095059a4SDave May 	PetscFunctionReturn(0);
547*095059a4SDave May }
548*095059a4SDave May 
549*095059a4SDave May /* === Phase B === */
550*095059a4SDave May #undef __FUNCT__
551*095059a4SDave May #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
552*095059a4SDave May PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
553*095059a4SDave May {
554*095059a4SDave May 	PetscMPIInt i,np;
555*095059a4SDave May 
556*095059a4SDave May 
557*095059a4SDave May 	PetscFunctionBegin;
558*095059a4SDave May 	np = de->n_neighbour_procs;
559*095059a4SDave May 
560*095059a4SDave May 	*local = -1;
561*095059a4SDave May 	for (i=0; i<np; i++) {
562*095059a4SDave May 		if (proc_id == de->neighbour_procs[i]) {
563*095059a4SDave May 			*local = i;
564*095059a4SDave May 			break;
565*095059a4SDave May 		}
566*095059a4SDave May 	}
567*095059a4SDave May 	PetscFunctionReturn(0);
568*095059a4SDave May }
569*095059a4SDave May 
570*095059a4SDave May #undef __FUNCT__
571*095059a4SDave May #define __FUNCT__ "DataExInitializeSendCount"
572*095059a4SDave May PetscErrorCode DataExInitializeSendCount(DataEx de)
573*095059a4SDave May {
574*095059a4SDave May 	PetscMPIInt i;
575*095059a4SDave May 
576*095059a4SDave May 
577*095059a4SDave May 	PetscFunctionBegin;
578*095059a4SDave May 	if (de->topology_status != DEOBJECT_FINALIZED) {
579*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
580*095059a4SDave May 	}
581*095059a4SDave May 
582*095059a4SDave May 	de->message_lengths_status = DEOBJECT_INITIALIZED;
583*095059a4SDave May 
584*095059a4SDave May 	for (i=0; i<de->n_neighbour_procs; i++) {
585*095059a4SDave May 		de->messages_to_be_sent[i] = 0;
586*095059a4SDave May 	}
587*095059a4SDave May 
588*095059a4SDave May 	PetscFunctionReturn(0);
589*095059a4SDave May }
590*095059a4SDave May 
591*095059a4SDave May /*
592*095059a4SDave May 1) only allows counters to be set on neighbouring cpus
593*095059a4SDave May */
594*095059a4SDave May #undef __FUNCT__
595*095059a4SDave May #define __FUNCT__ "DataExAddToSendCount"
596*095059a4SDave May PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
597*095059a4SDave May {
598*095059a4SDave May 	PetscMPIInt    local_val;
599*095059a4SDave May 	PetscErrorCode ierr;
600*095059a4SDave May 
601*095059a4SDave May 
602*095059a4SDave May 	PetscFunctionBegin;
603*095059a4SDave May 	if (de->message_lengths_status == DEOBJECT_FINALIZED) {
604*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
605*095059a4SDave May 	}
606*095059a4SDave May 	else if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
607*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
608*095059a4SDave May 	}
609*095059a4SDave May 
610*095059a4SDave May 	ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
611*095059a4SDave May 	if (local_val == -1) {
612*095059a4SDave May 		SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
613*095059a4SDave May 	}
614*095059a4SDave May 
615*095059a4SDave May 	de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
616*095059a4SDave May 	PetscFunctionReturn(0);
617*095059a4SDave May }
618*095059a4SDave May 
619*095059a4SDave May #undef __FUNCT__
620*095059a4SDave May #define __FUNCT__ "DataExFinalizeSendCount"
621*095059a4SDave May PetscErrorCode DataExFinalizeSendCount(DataEx de)
622*095059a4SDave May {
623*095059a4SDave May 	PetscFunctionBegin;
624*095059a4SDave May 	if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
625*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
626*095059a4SDave May 	}
627*095059a4SDave May 	de->message_lengths_status = DEOBJECT_FINALIZED;
628*095059a4SDave May 
629*095059a4SDave May 	PetscFunctionReturn(0);
630*095059a4SDave May }
631*095059a4SDave May 
632*095059a4SDave May /* === Phase C === */
633*095059a4SDave May /*
634*095059a4SDave May  * zero out all send counts
635*095059a4SDave May  * free send and recv buffers
636*095059a4SDave May  * zeros out message length
637*095059a4SDave May  * zeros out all counters
638*095059a4SDave May  * zero out packed data counters
639*095059a4SDave May */
640*095059a4SDave May #undef __FUNCT__
641*095059a4SDave May #define __FUNCT__ "_DataExInitializeTmpStorage"
642*095059a4SDave May PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
643*095059a4SDave May {
644*095059a4SDave May 	PetscMPIInt i,np;
645*095059a4SDave May 
646*095059a4SDave May 
647*095059a4SDave May 	PetscFunctionBegin;
648*095059a4SDave May 	if (de->n_neighbour_procs < 0) {
649*095059a4SDave May 		SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
650*095059a4SDave May 	}
651*095059a4SDave May 	if (de->neighbour_procs == NULL) {
652*095059a4SDave May 		SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
653*095059a4SDave May 	}
654*095059a4SDave May 
655*095059a4SDave May 	np = de->n_neighbour_procs;
656*095059a4SDave May 	for (i=0; i<np; i++) {
657*095059a4SDave May 	/*	de->messages_to_be_sent[i] = -1; */
658*095059a4SDave May 		de->messages_to_be_recvieved[i] = -1;
659*095059a4SDave May 	}
660*095059a4SDave May 
661*095059a4SDave May 	if (de->send_message != NULL) {
662*095059a4SDave May 		free(de->send_message);
663*095059a4SDave May 		de->send_message = NULL;
664*095059a4SDave May 	}
665*095059a4SDave May 	if (de->recv_message != NULL) {
666*095059a4SDave May 		free(de->recv_message);
667*095059a4SDave May 		de->recv_message = NULL;
668*095059a4SDave May 	}
669*095059a4SDave May 
670*095059a4SDave May 	PetscFunctionReturn(0);
671*095059a4SDave May }
672*095059a4SDave May 
673*095059a4SDave May /*
674*095059a4SDave May *) Zeros out pack data counters
675*095059a4SDave May *) Ensures mesaage length is set
676*095059a4SDave May *) Checks send counts properly initialized
677*095059a4SDave May *) allocates space for pack data
678*095059a4SDave May */
679*095059a4SDave May #undef __FUNCT__
680*095059a4SDave May #define __FUNCT__ "DataExPackInitialize"
681*095059a4SDave May PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
682*095059a4SDave May {
683*095059a4SDave May 	PetscMPIInt    i,np;
684*095059a4SDave May 	PetscInt       total;
685*095059a4SDave May 	PetscErrorCode ierr;
686*095059a4SDave May 
687*095059a4SDave May 
688*095059a4SDave May 	PetscFunctionBegin;
689*095059a4SDave May 	if (de->topology_status != DEOBJECT_FINALIZED) {
690*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
691*095059a4SDave May 	}
692*095059a4SDave May 	if (de->message_lengths_status != DEOBJECT_FINALIZED) {
693*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
694*095059a4SDave May 	}
695*095059a4SDave May 
696*095059a4SDave May 	de->packer_status = DEOBJECT_INITIALIZED;
697*095059a4SDave May 
698*095059a4SDave May 	ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
699*095059a4SDave May 
700*095059a4SDave May 	np = de->n_neighbour_procs;
701*095059a4SDave May 
702*095059a4SDave May 	de->unit_message_size = unit_message_size;
703*095059a4SDave May 
704*095059a4SDave May 	total = 0;
705*095059a4SDave May 	for (i=0; i<np; i++) {
706*095059a4SDave May 		if (de->messages_to_be_sent[i] == -1) {
707*095059a4SDave May 			PetscMPIInt proc_neighour = de->neighbour_procs[i];
708*095059a4SDave May 			SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
709*095059a4SDave May 		}
710*095059a4SDave May 		total = total + de->messages_to_be_sent[i];
711*095059a4SDave May 	}
712*095059a4SDave May 
713*095059a4SDave May 	/* create space for the data to be sent */
714*095059a4SDave May 	de->send_message = (void*)malloc( unit_message_size * (total + 1) );
715*095059a4SDave May 	/* initialize memory */
716*095059a4SDave May 	memset( de->send_message, 0, unit_message_size * (total + 1) );
717*095059a4SDave May 	/* set total items to send */
718*095059a4SDave May 	de->send_message_length = total;
719*095059a4SDave May 
720*095059a4SDave May 	de->message_offsets[0] = 0;
721*095059a4SDave May 	total = de->messages_to_be_sent[0];
722*095059a4SDave May 	for (i=1; i<np; i++) {
723*095059a4SDave May 		de->message_offsets[i] = total;
724*095059a4SDave May 		total = total + de->messages_to_be_sent[i];
725*095059a4SDave May 	}
726*095059a4SDave May 
727*095059a4SDave May 	/* init the packer counters */
728*095059a4SDave May 	de->total_pack_cnt = 0;
729*095059a4SDave May 	for (i=0; i<np; i++) {
730*095059a4SDave May 		de->pack_cnt[i] = 0;
731*095059a4SDave May 	}
732*095059a4SDave May 
733*095059a4SDave May 	PetscFunctionReturn(0);
734*095059a4SDave May }
735*095059a4SDave May 
736*095059a4SDave May /*
737*095059a4SDave May *) Ensures data gets been packed appropriately and no overlaps occur
738*095059a4SDave May */
739*095059a4SDave May #undef __FUNCT__
740*095059a4SDave May #define __FUNCT__ "DataExPackData"
741*095059a4SDave May PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
742*095059a4SDave May {
743*095059a4SDave May 	PetscMPIInt    local;
744*095059a4SDave May 	PetscInt       insert_location;
745*095059a4SDave May 	void           *dest;
746*095059a4SDave May 	PetscErrorCode ierr;
747*095059a4SDave May 
748*095059a4SDave May 
749*095059a4SDave May 	PetscFunctionBegin;
750*095059a4SDave May 	if (de->packer_status == DEOBJECT_FINALIZED) {
751*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
752*095059a4SDave May 	}
753*095059a4SDave May 	else if (de->packer_status != DEOBJECT_INITIALIZED) {
754*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
755*095059a4SDave May 	}
756*095059a4SDave May 
757*095059a4SDave May 
758*095059a4SDave May 	if (de->send_message == NULL){
759*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
760*095059a4SDave May 	}
761*095059a4SDave May 
762*095059a4SDave May 
763*095059a4SDave May 	ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
764*095059a4SDave May 	if (local == -1) {
765*095059a4SDave May 		SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
766*095059a4SDave May 	}
767*095059a4SDave May 
768*095059a4SDave May 	if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) {
769*095059a4SDave May 		SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
770*095059a4SDave May 				(int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
771*095059a4SDave May 
772*095059a4SDave May 		/* don't need this - the catch for too many messages will pick this up. Gives us more info though */
773*095059a4SDave May 		if (de->packer_status == DEOBJECT_FINALIZED) {
774*095059a4SDave May 			SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." );
775*095059a4SDave May 		}
776*095059a4SDave May 	}
777*095059a4SDave May 
778*095059a4SDave May 	/* copy memory */
779*095059a4SDave May 	insert_location = de->message_offsets[local] + de->pack_cnt[local];
780*095059a4SDave May 	dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
781*095059a4SDave May 	memcpy( dest, data, de->unit_message_size * n );
782*095059a4SDave May 
783*095059a4SDave May 	/* increment counter */
784*095059a4SDave May 	de->pack_cnt[local] = de->pack_cnt[local] + n;
785*095059a4SDave May 
786*095059a4SDave May 	PetscFunctionReturn(0);
787*095059a4SDave May }
788*095059a4SDave May 
789*095059a4SDave May /*
790*095059a4SDave May *) Ensures all data has been packed
791*095059a4SDave May */
792*095059a4SDave May #undef __FUNCT__
793*095059a4SDave May #define __FUNCT__ "DataExPackFinalize"
794*095059a4SDave May PetscErrorCode DataExPackFinalize(DataEx de)
795*095059a4SDave May {
796*095059a4SDave May 	PetscMPIInt i,np;
797*095059a4SDave May 	PetscInt    total;
798*095059a4SDave May 	PetscErrorCode ierr;
799*095059a4SDave May 
800*095059a4SDave May 
801*095059a4SDave May 	PetscFunctionBegin;
802*095059a4SDave May 	if (de->packer_status != DEOBJECT_INITIALIZED) {
803*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
804*095059a4SDave May 	}
805*095059a4SDave May 
806*095059a4SDave May 	np = de->n_neighbour_procs;
807*095059a4SDave May 
808*095059a4SDave May 	for (i=0; i<np; i++) {
809*095059a4SDave May 		if (de->pack_cnt[i] != de->messages_to_be_sent[i]) {
810*095059a4SDave May 			SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
811*095059a4SDave May 					(int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
812*095059a4SDave May 		}
813*095059a4SDave May 	}
814*095059a4SDave May 
815*095059a4SDave May 	/* init */
816*095059a4SDave May 	for (i=0; i<np; i++) {
817*095059a4SDave May 		de->messages_to_be_recvieved[i] = -1;
818*095059a4SDave May 	}
819*095059a4SDave May 
820*095059a4SDave May 	/* figure out the recv counts here */
821*095059a4SDave May 	for (i=0; i<np; i++) {
822*095059a4SDave May 	//	MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], de->send_tags[i], de->comm );
823*095059a4SDave May 		ierr = MPI_Isend( &de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
824*095059a4SDave May 	//	MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm );
825*095059a4SDave May 	}
826*095059a4SDave May 	for (i=0; i<np; i++) {
827*095059a4SDave May 	//	MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &stat );
828*095059a4SDave May 		ierr = MPI_Irecv( &de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
829*095059a4SDave May 	//	MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm, &stat );
830*095059a4SDave May 	}
831*095059a4SDave May 	ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
832*095059a4SDave May 
833*095059a4SDave May 	/* create space for the data to be recvieved */
834*095059a4SDave May 	total = 0;
835*095059a4SDave May 	for (i=0; i<np; i++) {
836*095059a4SDave May 		total = total + de->messages_to_be_recvieved[i];
837*095059a4SDave May 	}
838*095059a4SDave May 	de->recv_message = (void*)malloc( de->unit_message_size * (total + 1) );
839*095059a4SDave May 	/* initialize memory */
840*095059a4SDave May 	memset( de->recv_message, 0, de->unit_message_size * (total + 1) );
841*095059a4SDave May 	/* set total items to recieve */
842*095059a4SDave May 	de->recv_message_length = total;
843*095059a4SDave May 
844*095059a4SDave May 	de->packer_status = DEOBJECT_FINALIZED;
845*095059a4SDave May 
846*095059a4SDave May 	de->communication_status = DEOBJECT_INITIALIZED;
847*095059a4SDave May 
848*095059a4SDave May 	PetscFunctionReturn(0);
849*095059a4SDave May }
850*095059a4SDave May 
851*095059a4SDave May /* do the actual message passing now */
852*095059a4SDave May #undef __FUNCT__
853*095059a4SDave May #define __FUNCT__ "DataExBegin"
854*095059a4SDave May PetscErrorCode DataExBegin(DataEx de)
855*095059a4SDave May {
856*095059a4SDave May 	PetscMPIInt i,np;
857*095059a4SDave May 	void       *dest;
858*095059a4SDave May 	PetscInt    length;
859*095059a4SDave May 	PetscErrorCode ierr;
860*095059a4SDave May 
861*095059a4SDave May 
862*095059a4SDave May 	PetscFunctionBegin;
863*095059a4SDave May 	if (de->topology_status != DEOBJECT_FINALIZED) {
864*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
865*095059a4SDave May 	}
866*095059a4SDave May 	if (de->message_lengths_status != DEOBJECT_FINALIZED) {
867*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
868*095059a4SDave May 	}
869*095059a4SDave May 	if (de->packer_status != DEOBJECT_FINALIZED) {
870*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
871*095059a4SDave May 	}
872*095059a4SDave May 
873*095059a4SDave May 	if (de->communication_status == DEOBJECT_FINALIZED) {
874*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
875*095059a4SDave May 	}
876*095059a4SDave May 
877*095059a4SDave May 	if (de->recv_message == NULL) {
878*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
879*095059a4SDave May 	}
880*095059a4SDave May 
881*095059a4SDave May 	ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
882*095059a4SDave May 	np = de->n_neighbour_procs;
883*095059a4SDave May 
884*095059a4SDave May 	/* == NON BLOCKING == */
885*095059a4SDave May 	for (i=0; i<np; i++) {
886*095059a4SDave May 		length = de->messages_to_be_sent[i] * de->unit_message_size;
887*095059a4SDave May 		dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
888*095059a4SDave May 		ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
889*095059a4SDave May 	}
890*095059a4SDave May 
891*095059a4SDave May 	ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
892*095059a4SDave May 	PetscFunctionReturn(0);
893*095059a4SDave May }
894*095059a4SDave May 
895*095059a4SDave May /* do the actual message passing now */
896*095059a4SDave May #undef __FUNCT__
897*095059a4SDave May #define __FUNCT__ "DataExEnd"
898*095059a4SDave May PetscErrorCode DataExEnd(DataEx de)
899*095059a4SDave May {
900*095059a4SDave May 	PetscMPIInt  i,np;
901*095059a4SDave May 	PetscInt     total;
902*095059a4SDave May 	PetscInt    *message_recv_offsets;
903*095059a4SDave May 	void        *dest;
904*095059a4SDave May 	PetscInt     length;
905*095059a4SDave May 	PetscErrorCode ierr;
906*095059a4SDave May 
907*095059a4SDave May 
908*095059a4SDave May 	PetscFunctionBegin;
909*095059a4SDave May 	if (de->communication_status != DEOBJECT_INITIALIZED) {
910*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
911*095059a4SDave May 	}
912*095059a4SDave May 	if (de->recv_message == NULL) {
913*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
914*095059a4SDave May 	}
915*095059a4SDave May 
916*095059a4SDave May 	ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
917*095059a4SDave May 	np = de->n_neighbour_procs;
918*095059a4SDave May 
919*095059a4SDave May 	message_recv_offsets = (PetscInt*)malloc( sizeof(PetscInt) * np );
920*095059a4SDave May 	message_recv_offsets[0] = 0;
921*095059a4SDave May 	total = de->messages_to_be_recvieved[0];
922*095059a4SDave May 	for (i=1; i<np; i++) {
923*095059a4SDave May 		message_recv_offsets[i] = total;
924*095059a4SDave May 		total = total + de->messages_to_be_recvieved[i];
925*095059a4SDave May 	}
926*095059a4SDave May 
927*095059a4SDave May 	/* == NON BLOCKING == */
928*095059a4SDave May 	for (i=0; i<np; i++) {
929*095059a4SDave May 		length = de->messages_to_be_recvieved[i] * de->unit_message_size;
930*095059a4SDave May 		dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
931*095059a4SDave May 		ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
932*095059a4SDave May 	}
933*095059a4SDave May 	ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
934*095059a4SDave May 
935*095059a4SDave May 	free(message_recv_offsets);
936*095059a4SDave May 
937*095059a4SDave May 	de->communication_status = DEOBJECT_FINALIZED;
938*095059a4SDave May 	ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
939*095059a4SDave May 	PetscFunctionReturn(0);
940*095059a4SDave May }
941*095059a4SDave May 
942*095059a4SDave May #undef __FUNCT__
943*095059a4SDave May #define __FUNCT__ "DataExGetSendData"
944*095059a4SDave May PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
945*095059a4SDave May {
946*095059a4SDave May 	PetscFunctionBegin;
947*095059a4SDave May 	if (de->packer_status != DEOBJECT_FINALIZED) {
948*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
949*095059a4SDave May 	}
950*095059a4SDave May 	*length = de->send_message_length;
951*095059a4SDave May 	*send   = de->send_message;
952*095059a4SDave May 	PetscFunctionReturn(0);
953*095059a4SDave May }
954*095059a4SDave May 
955*095059a4SDave May #undef __FUNCT__
956*095059a4SDave May #define __FUNCT__ "DataExGetRecvData"
957*095059a4SDave May PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
958*095059a4SDave May {
959*095059a4SDave May 	PetscFunctionBegin;
960*095059a4SDave May 	if (de->communication_status != DEOBJECT_FINALIZED) {
961*095059a4SDave May 		SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
962*095059a4SDave May 	}
963*095059a4SDave May 	*length = de->recv_message_length;
964*095059a4SDave May 	*recv   = de->recv_message;
965*095059a4SDave May 	PetscFunctionReturn(0);
966*095059a4SDave May }
967*095059a4SDave May 
968*095059a4SDave May #undef __FUNCT__
969*095059a4SDave May #define __FUNCT__ "DataExTopologyGetNeighbours"
970*095059a4SDave May PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
971*095059a4SDave May {
972*095059a4SDave May 	PetscFunctionBegin;
973*095059a4SDave May 	if (n)     { *n     = de->n_neighbour_procs; }
974*095059a4SDave May 	if (neigh) { *neigh = de->neighbour_procs; }
975*095059a4SDave May 	PetscFunctionReturn(0);
976*095059a4SDave May }
977*095059a4SDave May 
978