xref: /petsc/src/dm/impls/swarm/data_ex.c (revision b9a482c4dc35e76cdf97cd67c52ad91ca5272534)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 //
51 DataExCreate()
52 // A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 // B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 // C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 // D
68 DataExBegin()
69 // ... perform any calculations ... ///
70 DataExEnd()
71 
72 // Call any getters //
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "DataExCreate"
91 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
92 {
93   PetscErrorCode ierr;
94   DataEx         d;
95 
96   PetscFunctionBegin;
97   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
98   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
99   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
100   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
101 
102   d->instance = count;
103 
104   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
105   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
106   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
107   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
108 
109   d->n_neighbour_procs = -1;
110   d->neighbour_procs   = NULL;
111 
112   d->messages_to_be_sent      = NULL;
113   d->message_offsets          = NULL;
114   d->messages_to_be_recvieved = NULL;
115 
116   d->unit_message_size   = -1;
117   d->send_message        = NULL;
118   d->send_message_length = -1;
119   d->recv_message        = NULL;
120   d->recv_message_length = -1;
121   d->total_pack_cnt      = -1;
122   d->pack_cnt            = NULL;
123 
124   d->send_tags = NULL;
125   d->recv_tags = NULL;
126 
127   d->_stats    = NULL;
128   d->_requests = NULL;
129   *ex = d;
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "DataExView"
135 PetscErrorCode DataExView(DataEx d)
136 {
137   PetscMPIInt p;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
144   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
145   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
146 
147   if (d->topology_status == DEOBJECT_FINALIZED) {
148     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
149     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
150     for (p=0; p<d->n_neighbour_procs; p++) {
151       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
152     }
153   }
154   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
155     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
156     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
157     for (p=0; p<d->n_neighbour_procs; p++) {
158       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
159     }
160     for (p=0; p<d->n_neighbour_procs; p++) {
161       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
162     }
163   }
164   if (d->packer_status == DEOBJECT_FINALIZED) {}
165   if (d->communication_status == DEOBJECT_FINALIZED) {}
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "DataExDestroy"
171 PetscErrorCode DataExDestroy(DataEx d)
172 {
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
177   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
178   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
179   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
180   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
181   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
182   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
183   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
184   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
185   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
186   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
187   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
188   ierr = PetscFree(d);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 /* === Phase A === */
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "DataExTopologyInitialize"
196 PetscErrorCode DataExTopologyInitialize(DataEx d)
197 {
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   d->topology_status = DEOBJECT_INITIALIZED;
202   d->n_neighbour_procs = 0;
203   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
204   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
205   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
206   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
207   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
208   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
209   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "DataExTopologyAddNeighbour"
215 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
216 {
217   PetscMPIInt    n,found;
218   PetscMPIInt    nproc;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (d->topology_status == DEOBJECT_FINALIZED) {
223     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
224   } else if (d->topology_status != DEOBJECT_INITIALIZED) {
225     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
226   }
227   /* error on negative entries */
228   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
229   /* error on ranks larger than number of procs in communicator */
230   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
231   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
232   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
233   /* check for proc_id */
234   found = 0;
235   for (n = 0; n < d->n_neighbour_procs; n++) {
236     if (d->neighbour_procs[n] == proc_id) {
237       found  = 1;
238     }
239   }
240   if (found == 0) { /* add it to list */
241     ierr = PetscRealloc(sizeof(PetscMPIInt)*(d->n_neighbour_procs+1), &d->neighbour_procs);CHKERRQ(ierr);
242     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
243     d->n_neighbour_procs++;
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249 counter: the index of the communication object
250 N: the number of processors
251 r0: rank of sender
252 r1: rank of receiver
253 
254 procs = { 0, 1, 2, 3 }
255 
256 0 ==> 0		e=0
257 0 ==> 1		e=1
258 0 ==> 2		e=2
259 0 ==> 3		e=3
260 
261 1 ==> 0		e=4
262 1 ==> 1		e=5
263 1 ==> 2		e=6
264 1 ==> 3		e=7
265 
266 2 ==> 0		e=8
267 2 ==> 1		e=9
268 2 ==> 2		e=10
269 2 ==> 3		e=11
270 
271 3 ==> 0		e=12
272 3 ==> 1		e=13
273 3 ==> 2		e=14
274 3 ==> 3		e=15
275 
276 If we require that proc A sends to proc B, then the SEND tag index will be given by
277   N * rank(A) + rank(B) + offset
278 If we require that proc A will receive from proc B, then the RECV tag index will be given by
279   N * rank(B) + rank(A) + offset
280 
281 */
282 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
283 {
284   PetscMPIInt st,rt;
285 
286   st = N*r0 + r1   +   N*N*counter;
287   rt = N*r1 + r0   +   N*N*counter;
288   *_st = st;
289   *_rt = rt;
290 }
291 
292 /*
293 Makes the communication map symmetric
294 */
295 #undef __FUNCT__
296 #define __FUNCT__ "_DataExCompleteCommunicationMap"
297 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
298 {
299   Mat               A;
300   PetscInt          i,j,nc;
301   PetscInt          n_, *proc_neighbours_;
302   PetscInt          rank_i_;
303   PetscMPIInt       size,  rank_i;
304   PetscScalar       *vals;
305   const PetscInt    *cols;
306   const PetscScalar *red_vals;
307   PetscMPIInt       _n_new, *_proc_neighbours_new;
308   PetscErrorCode    ierr;
309 
310   PetscFunctionBegin;
311   n_ = n;
312   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
313   for (i = 0; i < n_; ++i) {
314     proc_neighbours_[i] = proc_neighbours[i];
315   }
316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
317   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
318   rank_i_ = rank_i;
319 
320   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
321   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
322   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
323   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
324   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
325   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
326   /* Build original map */
327   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
328   for (i = 0; i < n_; ++i) {
329     vals[i] = 1.0;
330   }
331   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
332   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
333   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
334   /* Now force all other connections if they are not already there */
335   /* It's more efficient to do them all at once */
336   for (i = 0; i < n_; ++i) {
337     vals[i] = 2.0;
338   }
339   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
340   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342 /*
343   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
344   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
345   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
346 */
347   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
348     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
349     _n_new = (PetscMPIInt) nc;
350     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
351     for (j = 0; j < nc; ++j) {
352       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
353     }
354     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
355     *n_new               = (PetscMPIInt)_n_new;
356     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
357   }
358   ierr = MatDestroy(&A);CHKERRQ(ierr);
359   ierr = PetscFree(vals);CHKERRQ(ierr);
360   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
361   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "DataExTopologyFinalize"
367 PetscErrorCode DataExTopologyFinalize(DataEx d)
368 {
369   PetscMPIInt    symm_nn;
370   PetscMPIInt   *symm_procs;
371   PetscMPIInt    r0,n,st,rt;
372   PetscMPIInt    nprocs;
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   if (d->topology_status != DEOBJECT_INITIALIZED) {
377     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
378   }
379   ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
380   /* given infomation about all my neighbours, make map symmetric */
381   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
382   /* update my arrays */
383   ierr = PetscFree(d->neighbour_procs);
384   d->n_neighbour_procs = symm_nn;
385   d->neighbour_procs   = symm_procs;
386   /* allocates memory */
387   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_sent);CHKERRQ(ierr);}
388   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->message_offsets);CHKERRQ(ierr);}
389   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs+1, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
390   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
391   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
392   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
393   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
394   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
395   /* compute message tags */
396   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
397   r0 = d->rank;
398   for (n = 0; n < d->n_neighbour_procs; ++n) {
399     PetscMPIInt r1 = d->neighbour_procs[n];
400 
401     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
402     d->send_tags[n] = (int)st;
403     d->recv_tags[n] = (int)rt;
404   }
405   d->topology_status = DEOBJECT_FINALIZED;
406   ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 /* === Phase B === */
411 #undef __FUNCT__
412 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
413 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
414 {
415   PetscMPIInt i,np;
416 
417   PetscFunctionBegin;
418   np = de->n_neighbour_procs;
419   *local = -1;
420   for (i = 0; i < np; ++i) {
421     if (proc_id == de->neighbour_procs[i]) {
422       *local = i;
423       break;
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "DataExInitializeSendCount"
431 PetscErrorCode DataExInitializeSendCount(DataEx de)
432 {
433   PetscMPIInt i;
434 
435   PetscFunctionBegin;
436   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
437   de->message_lengths_status = DEOBJECT_INITIALIZED;
438   for (i = 0; i < de->n_neighbour_procs; ++i) {
439     de->messages_to_be_sent[i] = 0;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 /*
445 1) only allows counters to be set on neighbouring cpus
446 */
447 #undef __FUNCT__
448 #define __FUNCT__ "DataExAddToSendCount"
449 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
450 {
451   PetscMPIInt    local_val;
452   PetscErrorCode ierr;
453 
454   PetscFunctionBegin;
455   if (de->message_lengths_status == DEOBJECT_FINALIZED) {
456     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
457   } else if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
458     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
459   }
460   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
461   if (local_val == -1) {
462     SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
463   }
464   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "DataExFinalizeSendCount"
470 PetscErrorCode DataExFinalizeSendCount(DataEx de)
471 {
472   PetscFunctionBegin;
473   if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
474     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
475   }
476   de->message_lengths_status = DEOBJECT_FINALIZED;
477   PetscFunctionReturn(0);
478 }
479 
480 /* === Phase C === */
481 /*
482  * zero out all send counts
483  * free send and recv buffers
484  * zeros out message length
485  * zeros out all counters
486  * zero out packed data counters
487 */
488 #undef __FUNCT__
489 #define __FUNCT__ "_DataExInitializeTmpStorage"
490 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
491 {
492   PetscMPIInt    i, np;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   /*if (de->n_neighbour_procs < 0) {
497     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
498   }
499   */
500   /*
501   if (!de->neighbour_procs) {
502     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
503   }
504   */
505   np = de->n_neighbour_procs;
506   for (i = 0; i < np; ++i) {
507     /*	de->messages_to_be_sent[i] = -1; */
508     de->messages_to_be_recvieved[i] = -1;
509   }
510   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
511   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 /*
516 *) Zeros out pack data counters
517 *) Ensures mesaage length is set
518 *) Checks send counts properly initialized
519 *) allocates space for pack data
520 */
521 #undef __FUNCT__
522 #define __FUNCT__ "DataExPackInitialize"
523 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
524 {
525   PetscMPIInt    i,np;
526   PetscInt       total;
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
531   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
532   de->packer_status = DEOBJECT_INITIALIZED;
533   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
534   np = de->n_neighbour_procs;
535   de->unit_message_size = unit_message_size;
536   total = 0;
537   for (i = 0; i < np; ++i) {
538     if (de->messages_to_be_sent[i] == -1) {
539       PetscMPIInt proc_neighour = de->neighbour_procs[i];
540       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
541     }
542     total = total + de->messages_to_be_sent[i];
543   }
544   /* create space for the data to be sent */
545   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
546   /* initialize memory */
547   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
548   /* set total items to send */
549   de->send_message_length = total;
550   de->message_offsets[0] = 0;
551   total = de->messages_to_be_sent[0];
552   for (i = 1; i < np; ++i) {
553     de->message_offsets[i] = total;
554     total = total + de->messages_to_be_sent[i];
555   }
556   /* init the packer counters */
557   de->total_pack_cnt = 0;
558   for (i = 0; i < np; ++i) {
559     de->pack_cnt[i] = 0;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565 *) Ensures data gets been packed appropriately and no overlaps occur
566 */
567 #undef __FUNCT__
568 #define __FUNCT__ "DataExPackData"
569 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
570 {
571   PetscMPIInt    local;
572   PetscInt       insert_location;
573   void           *dest;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   if (de->packer_status == DEOBJECT_FINALIZED) {
578     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
579   }
580   else if (de->packer_status != DEOBJECT_INITIALIZED) {
581     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
582   }
583   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
584   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
585   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
586   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) {
587     SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
588               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
589     /* don't need this - the catch for too many messages will pick this up. Gives us more info though */
590     if (de->packer_status == DEOBJECT_FINALIZED) {
591       SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." );
592     }
593   }
594   /* copy memory */
595   insert_location = de->message_offsets[local] + de->pack_cnt[local];
596   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
597   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
598   /* increment counter */
599   de->pack_cnt[local] = de->pack_cnt[local] + n;
600   PetscFunctionReturn(0);
601 }
602 
603 /*
604 *) Ensures all data has been packed
605 */
606 #undef __FUNCT__
607 #define __FUNCT__ "DataExPackFinalize"
608 PetscErrorCode DataExPackFinalize(DataEx de)
609 {
610   PetscMPIInt i,np;
611   PetscInt    total;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
616   np = de->n_neighbour_procs;
617   for (i = 0; i < np; ++i) {
618     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) {
619       SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
620                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
621     }
622   }
623   /* init */
624   for (i = 0; i < np; ++i) {
625     de->messages_to_be_recvieved[i] = -1;
626   }
627   /* figure out the recv counts here */
628   for (i = 0; i < np; ++i) {
629     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
630   }
631   for (i = 0; i < np; ++i) {
632     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
633   }
634   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
635   /* create space for the data to be recvieved */
636   total = 0;
637   for (i = 0; i < np; ++i) {
638     total = total + de->messages_to_be_recvieved[i];
639   }
640   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
641   /* initialize memory */
642   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
643   /* set total items to recieve */
644   de->recv_message_length = total;
645   de->packer_status = DEOBJECT_FINALIZED;
646   de->communication_status = DEOBJECT_INITIALIZED;
647   PetscFunctionReturn(0);
648 }
649 
650 /* do the actual message passing now */
651 #undef __FUNCT__
652 #define __FUNCT__ "DataExBegin"
653 PetscErrorCode DataExBegin(DataEx de)
654 {
655   PetscMPIInt i,np;
656   void       *dest;
657   PetscInt    length;
658   PetscErrorCode ierr;
659 
660   PetscFunctionBegin;
661   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
662   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
663   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
664   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
665   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
666   ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
667   np = de->n_neighbour_procs;
668   /* == NON BLOCKING == */
669   for (i = 0; i < np; ++i) {
670     length = de->messages_to_be_sent[i] * de->unit_message_size;
671     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
672     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
673   }
674   ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 /* do the actual message passing now */
679 #undef __FUNCT__
680 #define __FUNCT__ "DataExEnd"
681 PetscErrorCode DataExEnd(DataEx de)
682 {
683   PetscMPIInt  i,np;
684   PetscInt     total;
685   PetscInt    *message_recv_offsets;
686   void        *dest;
687   PetscInt     length;
688   PetscErrorCode ierr;
689 
690   PetscFunctionBegin;
691   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
692   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
693   ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
694   np = de->n_neighbour_procs;
695   ierr = PetscMalloc1(np+1, &message_recv_offsets);CHKERRQ(ierr);
696   message_recv_offsets[0] = 0;
697   total = de->messages_to_be_recvieved[0];
698   for (i = 1; i < np; ++i) {
699     message_recv_offsets[i] = total;
700     total = total + de->messages_to_be_recvieved[i];
701   }
702   /* == NON BLOCKING == */
703   for (i = 0; i < np; ++i) {
704     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
705     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
706     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
707   }
708   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
709   ierr = PetscFree(message_recv_offsets);
710   de->communication_status = DEOBJECT_FINALIZED;
711   ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "DataExGetSendData"
717 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
718 {
719   PetscFunctionBegin;
720   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
721   *length = de->send_message_length;
722   *send   = de->send_message;
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "DataExGetRecvData"
728 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
729 {
730   PetscFunctionBegin;
731   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
732   *length = de->recv_message_length;
733   *recv   = de->recv_message;
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "DataExTopologyGetNeighbours"
739 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
740 {
741   PetscFunctionBegin;
742   if (n)     {*n     = de->n_neighbour_procs;}
743   if (neigh) {*neigh = de->neighbour_procs;}
744   PetscFunctionReturn(0);
745 }
746 
747