xref: /petsc/src/dm/impls/swarm/data_ex.c (revision d2ebda5dc63913b1e21b49d168148e9df5e6b3bc)
1 /*
2 Build a few basic tools to help with partitioned domains.
3 
4 1)
5 On each processor, have a DomainExchangerTopology.
6 This is a doubly-connected edge list which enumerates the
7 communication paths between connected processors. By numbering
8 these paths we can always uniquely assign message identifers.
9 
10         edge
11          10
12 proc  --------->  proc
13  0    <--------    1
14          11
15         twin
16 
17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct
18 message, proc 1 looks for the edge connected to proc 0, and then the
19 messgae id comes from the twin of that edge
20 
21 2)
22 A DomainExchangerArrayPacker.
23 A little function which given a piece of data, will memcpy the data into
24 an array (which will be sent to procs) into the correct place.
25 
26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths.
27 All data gets jammed into single array. Need to "jam" data into correct locations
28 The Packer knows how much is to going to each processor and keeps track of the inserts
29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory
30 
31 data to 0    data to 2       data to 3
32 
33 |--------|-----------------|--|
34 
35 
36 User has to unpack message themselves. I can get you the pointer for each i
37 entry, but you'll have to cast it to the appropriate data type.
38 
39 
40 
41 
42 Phase A: Build topology
43 
44 Phase B: Define message lengths
45 
46 Phase C: Pack data
47 
48 Phase D: Send data
49 
50 //
51 DataExCreate()
52 // A
53 DataExTopologyInitialize()
54 DataExTopologyAddNeighbour()
55 DataExTopologyAddNeighbour()
56 DataExTopologyFinalize()
57 // B
58 DataExZeroAllSendCount()
59 DataExAddToSendCount()
60 DataExAddToSendCount()
61 DataExAddToSendCount()
62 // C
63 DataExPackInitialize()
64 DataExPackData()
65 DataExPackData()
66 DataExPackFinalize()
67 // D
68 DataExBegin()
69 // ... perform any calculations ... ///
70 DataExEnd()
71 
72 // Call any getters //
73 
74 
75 */
76 
77 #include <petsc.h>
78 #include <petscvec.h>
79 #include <petscmat.h>
80 
81 #include "data_ex.h"
82 
83 const char *status_names[] = {"initialized", "finalized", "unknown"};
84 
85 PetscLogEvent PTATIN_DataExchangerTopologySetup;
86 PetscLogEvent PTATIN_DataExchangerBegin;
87 PetscLogEvent PTATIN_DataExchangerEnd;
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "DataExCreate"
91 PetscErrorCode DataExCreate(MPI_Comm comm,const PetscInt count, DataEx *ex)
92 {
93   PetscErrorCode ierr;
94   DataEx         d;
95 
96   PetscFunctionBegin;
97   ierr = PetscMalloc(sizeof(struct _p_DataEx), &d);CHKERRQ(ierr);
98   ierr = PetscMemzero(d, sizeof(struct _p_DataEx));CHKERRQ(ierr);
99   ierr = MPI_Comm_dup(comm,&d->comm);CHKERRQ(ierr);
100   ierr = MPI_Comm_rank(d->comm,&d->rank);CHKERRQ(ierr);
101 
102   d->instance = count;
103 
104   d->topology_status        = DEOBJECT_STATE_UNKNOWN;
105   d->message_lengths_status = DEOBJECT_STATE_UNKNOWN;
106   d->packer_status          = DEOBJECT_STATE_UNKNOWN;
107   d->communication_status   = DEOBJECT_STATE_UNKNOWN;
108 
109   d->n_neighbour_procs = -1;
110   d->neighbour_procs   = NULL;
111 
112   d->messages_to_be_sent      = NULL;
113   d->message_offsets          = NULL;
114   d->messages_to_be_recvieved = NULL;
115 
116   d->unit_message_size   = -1;
117   d->send_message        = NULL;
118   d->send_message_length = -1;
119   d->recv_message        = NULL;
120   d->recv_message_length = -1;
121   d->total_pack_cnt      = -1;
122   d->pack_cnt            = NULL;
123 
124   d->send_tags = NULL;
125   d->recv_tags = NULL;
126 
127   d->_stats    = NULL;
128   d->_requests = NULL;
129   *ex = d;
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "DataExView"
135 PetscErrorCode DataExView(DataEx d)
136 {
137   PetscMPIInt p;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance);CHKERRQ(ierr);
142   ierr = PetscPrintf( PETSC_COMM_WORLD, "  topology status:        %s \n", status_names[d->topology_status]);CHKERRQ(ierr);
143   ierr = PetscPrintf( PETSC_COMM_WORLD, "  message lengths status: %s \n", status_names[d->message_lengths_status] );CHKERRQ(ierr);
144   ierr = PetscPrintf( PETSC_COMM_WORLD, "  packer status status:   %s \n", status_names[d->packer_status] );CHKERRQ(ierr);
145   ierr = PetscPrintf( PETSC_COMM_WORLD, "  communication status:   %s \n", status_names[d->communication_status] );CHKERRQ(ierr);
146 
147   if (d->topology_status == DEOBJECT_FINALIZED) {
148     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Topology:\n");CHKERRQ(ierr);
149     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs );CHKERRQ(ierr);
150     for (p=0; p<d->n_neighbour_procs; p++) {
151       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d]   neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]);CHKERRQ(ierr);
152     }
153   }
154   if (d->message_lengths_status == DEOBJECT_FINALIZED) {
155     ierr = PetscPrintf( PETSC_COMM_WORLD, "  Message lengths:\n");CHKERRQ(ierr);
156     ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size );CHKERRQ(ierr);
157     for (p=0; p<d->n_neighbour_procs; p++) {
158       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
159     }
160     for (p=0; p<d->n_neighbour_procs; p++) {
161       ierr = PetscPrintf( PETSC_COMM_SELF, "    [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] );CHKERRQ(ierr);
162     }
163   }
164   if (d->packer_status == DEOBJECT_FINALIZED) {}
165   if (d->communication_status == DEOBJECT_FINALIZED) {}
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "DataExDestroy"
171 PetscErrorCode DataExDestroy(DataEx d)
172 {
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr);
177   if (d->neighbour_procs) {ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);}
178   if (d->messages_to_be_sent) {ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);}
179   if (d->message_offsets) {ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);}
180   if (d->messages_to_be_recvieved) {ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);}
181   if (d->send_message) {ierr = PetscFree(d->send_message);CHKERRQ(ierr);}
182   if (d->recv_message) {ierr = PetscFree(d->recv_message);CHKERRQ(ierr);}
183   if (d->pack_cnt) {ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);}
184   if (d->send_tags) {ierr = PetscFree(d->send_tags);CHKERRQ(ierr);}
185   if (d->recv_tags) {ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);}
186   if (d->_stats) {ierr = PetscFree(d->_stats);CHKERRQ(ierr);}
187   if (d->_requests) {ierr = PetscFree(d->_requests);CHKERRQ(ierr);}
188   ierr = PetscFree(d);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 /* === Phase A === */
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "DataExTopologyInitialize"
196 PetscErrorCode DataExTopologyInitialize(DataEx d)
197 {
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   d->topology_status = DEOBJECT_INITIALIZED;
202   d->n_neighbour_procs = 0;
203   ierr = PetscFree(d->neighbour_procs);CHKERRQ(ierr);
204   ierr = PetscFree(d->messages_to_be_sent);CHKERRQ(ierr);
205   ierr = PetscFree(d->message_offsets);CHKERRQ(ierr);
206   ierr = PetscFree(d->messages_to_be_recvieved);CHKERRQ(ierr);
207   ierr = PetscFree(d->pack_cnt);CHKERRQ(ierr);
208   ierr = PetscFree(d->send_tags);CHKERRQ(ierr);
209   ierr = PetscFree(d->recv_tags);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "DataExTopologyAddNeighbour"
215 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id)
216 {
217   PetscMPIInt    n,found;
218   PetscMPIInt    nproc;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (d->topology_status == DEOBJECT_FINALIZED) {
223     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first");
224   } else if (d->topology_status != DEOBJECT_INITIALIZED) {
225     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
226   }
227   /* error on negative entries */
228   if (proc_id < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0");
229   /* error on ranks larger than number of procs in communicator */
230   ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr);
231   if (proc_id >= nproc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc");
232   if (d->n_neighbour_procs == 0) {ierr = PetscMalloc1(1, &d->neighbour_procs);CHKERRQ(ierr);}
233   /* check for proc_id */
234   found = 0;
235   for (n = 0; n < d->n_neighbour_procs; n++) {
236     if (d->neighbour_procs[n] == proc_id) {
237       found  = 1;
238     }
239   }
240   if (found == 0) { /* add it to list */
241     PetscMPIInt *tmp;
242 
243     tmp = (PetscMPIInt*) realloc(d->neighbour_procs, sizeof(PetscMPIInt)*(d->n_neighbour_procs+1));
244     d->neighbour_procs = tmp;
245     d->neighbour_procs[ d->n_neighbour_procs ] = proc_id;
246     d->n_neighbour_procs++;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 /*
252 counter: the index of the communication object
253 N: the number of processors
254 r0: rank of sender
255 r1: rank of receiver
256 
257 procs = { 0, 1, 2, 3 }
258 
259 0 ==> 0		e=0
260 0 ==> 1		e=1
261 0 ==> 2		e=2
262 0 ==> 3		e=3
263 
264 1 ==> 0		e=4
265 1 ==> 1		e=5
266 1 ==> 2		e=6
267 1 ==> 3		e=7
268 
269 2 ==> 0		e=8
270 2 ==> 1		e=9
271 2 ==> 2		e=10
272 2 ==> 3		e=11
273 
274 3 ==> 0		e=12
275 3 ==> 1		e=13
276 3 ==> 2		e=14
277 3 ==> 3		e=15
278 
279 If we require that proc A sends to proc B, then the SEND tag index will be given by
280   N * rank(A) + rank(B) + offset
281 If we require that proc A will receive from proc B, then the RECV tag index will be given by
282   N * rank(B) + rank(A) + offset
283 
284 */
285 static void _get_tags(PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt)
286 {
287   PetscMPIInt st,rt;
288 
289   st = N*r0 + r1   +   N*N*counter;
290   rt = N*r1 + r0   +   N*N*counter;
291   *_st = st;
292   *_rt = rt;
293 }
294 
295 /*
296 Makes the communication map symmetric
297 */
298 #undef __FUNCT__
299 #define __FUNCT__ "_DataExCompleteCommunicationMap"
300 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new)
301 {
302   Mat               A;
303   PetscInt          i,j,nc;
304   PetscInt          n_, *proc_neighbours_;
305   PetscInt          rank_i_;
306   PetscMPIInt       size,  rank_i;
307   PetscScalar       *vals;
308   const PetscInt    *cols;
309   const PetscScalar *red_vals;
310   PetscMPIInt       _n_new, *_proc_neighbours_new;
311   PetscErrorCode    ierr;
312 
313   PetscFunctionBegin;
314   n_ = n;
315   ierr = PetscMalloc(sizeof(PetscInt) * n_, &proc_neighbours_);CHKERRQ(ierr);
316   for (i = 0; i < n_; ++i) {
317     proc_neighbours_[i] = proc_neighbours[i];
318   }
319   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
320   ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr);
321   rank_i_ = rank_i;
322 
323   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
324   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr);
325   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
326   ierr = MatSeqAIJSetPreallocation(A,1,NULL);CHKERRQ(ierr);
327   ierr = MatMPIAIJSetPreallocation(A,n_,NULL,n_,NULL);CHKERRQ(ierr);
328   ierr = MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);CHKERRQ(ierr);
329   /* Build original map */
330   ierr = PetscMalloc1(n_, &vals);CHKERRQ(ierr);
331   for (i = 0; i < n_; ++i) {
332     vals[i] = 1.0;
333   }
334   ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr);
335   ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
336   ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
337   /* Now force all other connections if they are not already there */
338   /* It's more efficient to do them all at once */
339   for (i = 0; i < n_; ++i) {
340     vals[i] = 2.0;
341   }
342   ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr);
343   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345 /*
346   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
347   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
348   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
349 */
350   if ((n_new != NULL) && (proc_neighbours_new != NULL)) {
351     ierr = MatGetRow(A, rank_i_, &nc, &cols, &red_vals);CHKERRQ(ierr);
352     _n_new = (PetscMPIInt) nc;
353     ierr = PetscMalloc1(_n_new, &_proc_neighbours_new);CHKERRQ(ierr);
354     for (j = 0; j < nc; ++j) {
355       _proc_neighbours_new[j] = (PetscMPIInt)cols[j];
356     }
357     ierr = MatRestoreRow( A, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr);
358     *n_new               = (PetscMPIInt)_n_new;
359     *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new;
360   }
361   ierr = MatDestroy(&A);CHKERRQ(ierr);
362   ierr = PetscFree(vals);CHKERRQ(ierr);
363   ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr);
364   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "DataExTopologyFinalize"
370 PetscErrorCode DataExTopologyFinalize(DataEx d)
371 {
372   PetscMPIInt    symm_nn;
373   PetscMPIInt   *symm_procs;
374   PetscMPIInt    r0,n,st,rt;
375   PetscMPIInt    nprocs;
376   PetscErrorCode ierr;
377 
378   PetscFunctionBegin;
379   if (d->topology_status != DEOBJECT_INITIALIZED) {
380     SETERRQ(d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first");
381   }
382   ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
383   /* given infomation about all my neighbours, make map symmetric */
384   ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr);
385   /* update my arrays */
386   ierr = PetscFree(d->neighbour_procs);
387   d->n_neighbour_procs = symm_nn;
388   d->neighbour_procs   = symm_procs;
389   /* allocates memory */
390   if (!d->messages_to_be_sent) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->messages_to_be_sent);CHKERRQ(ierr);}
391   if (!d->message_offsets) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->message_offsets);CHKERRQ(ierr);}
392   if (!d->messages_to_be_recvieved) {ierr = PetscMalloc1(d->n_neighbour_procs, &d->messages_to_be_recvieved);CHKERRQ(ierr);}
393   if (!d->pack_cnt) {ierr = PetscMalloc(sizeof(PetscInt) * d->n_neighbour_procs, &d->pack_cnt);CHKERRQ(ierr);}
394   if (!d->_stats) {ierr = PetscMalloc(sizeof(MPI_Status) * 2*d->n_neighbour_procs, &d->_stats);CHKERRQ(ierr);}
395   if (!d->_requests) {ierr = PetscMalloc(sizeof(MPI_Request) * 2*d->n_neighbour_procs, &d->_requests);CHKERRQ(ierr);}
396   if (!d->send_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->send_tags);CHKERRQ(ierr);}
397   if (!d->recv_tags) {ierr = PetscMalloc(sizeof(int) * d->n_neighbour_procs, &d->recv_tags);CHKERRQ(ierr);}
398   /* compute message tags */
399   ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr);
400   r0 = d->rank;
401   for (n = 0; n < d->n_neighbour_procs; ++n) {
402     PetscMPIInt r1 = d->neighbour_procs[n];
403 
404     _get_tags( d->instance, nprocs, r0,r1, &st, &rt );
405     d->send_tags[n] = (int)st;
406     d->recv_tags[n] = (int)rt;
407   }
408   d->topology_status = DEOBJECT_FINALIZED;
409   ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 /* === Phase B === */
414 #undef __FUNCT__
415 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex"
416 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local)
417 {
418   PetscMPIInt i,np;
419 
420   PetscFunctionBegin;
421   np = de->n_neighbour_procs;
422   *local = -1;
423   for (i = 0; i < np; ++i) {
424     if (proc_id == de->neighbour_procs[i]) {
425       *local = i;
426       break;
427     }
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "DataExInitializeSendCount"
434 PetscErrorCode DataExInitializeSendCount(DataEx de)
435 {
436   PetscMPIInt i;
437 
438   PetscFunctionBegin;
439   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ(de->comm, PETSC_ERR_ORDER, "Topology not finalized");
440   de->message_lengths_status = DEOBJECT_INITIALIZED;
441   for (i = 0; i < de->n_neighbour_procs; ++i) {
442     de->messages_to_be_sent[i] = 0;
443   }
444   PetscFunctionReturn(0);
445 }
446 
447 /*
448 1) only allows counters to be set on neighbouring cpus
449 */
450 #undef __FUNCT__
451 #define __FUNCT__ "DataExAddToSendCount"
452 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count)
453 {
454   PetscMPIInt    local_val;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   if (de->message_lengths_status == DEOBJECT_FINALIZED) {
459     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" );
460   } else if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
461     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
462   }
463   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr);
464   if (local_val == -1) {
465     SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id );
466   }
467   de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count;
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "DataExFinalizeSendCount"
473 PetscErrorCode DataExFinalizeSendCount(DataEx de)
474 {
475   PetscFunctionBegin;
476   if (de->message_lengths_status != DEOBJECT_INITIALIZED) {
477     SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" );
478   }
479   de->message_lengths_status = DEOBJECT_FINALIZED;
480   PetscFunctionReturn(0);
481 }
482 
483 /* === Phase C === */
484 /*
485  * zero out all send counts
486  * free send and recv buffers
487  * zeros out message length
488  * zeros out all counters
489  * zero out packed data counters
490 */
491 #undef __FUNCT__
492 #define __FUNCT__ "_DataExInitializeTmpStorage"
493 PetscErrorCode _DataExInitializeTmpStorage(DataEx de)
494 {
495   PetscMPIInt    i, np;
496   PetscErrorCode ierr;
497 
498   PetscFunctionBegin;
499   if (de->n_neighbour_procs < 0) {
500     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0");
501   }
502   if (!de->neighbour_procs) {
503     SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" );
504   }
505   np = de->n_neighbour_procs;
506   for (i = 0; i < np; ++i) {
507     /*	de->messages_to_be_sent[i] = -1; */
508     de->messages_to_be_recvieved[i] = -1;
509   }
510   ierr = PetscFree(de->send_message);CHKERRQ(ierr);
511   ierr = PetscFree(de->recv_message);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 /*
516 *) Zeros out pack data counters
517 *) Ensures mesaage length is set
518 *) Checks send counts properly initialized
519 *) allocates space for pack data
520 */
521 #undef __FUNCT__
522 #define __FUNCT__ "DataExPackInitialize"
523 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size)
524 {
525   PetscMPIInt    i,np;
526   PetscInt       total;
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
531   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
532   de->packer_status = DEOBJECT_INITIALIZED;
533   ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr);
534   np = de->n_neighbour_procs;
535   de->unit_message_size = unit_message_size;
536   total = 0;
537   for (i = 0; i < np; ++i) {
538     if (de->messages_to_be_sent[i] == -1) {
539       PetscMPIInt proc_neighour = de->neighbour_procs[i];
540       SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour );
541     }
542     total = total + de->messages_to_be_sent[i];
543   }
544   /* create space for the data to be sent */
545   ierr = PetscMalloc(unit_message_size * (total + 1), &de->send_message);CHKERRQ(ierr);
546   /* initialize memory */
547   ierr = PetscMemzero(de->send_message, unit_message_size * (total + 1));CHKERRQ(ierr);
548   /* set total items to send */
549   de->send_message_length = total;
550   de->message_offsets[0] = 0;
551   total = de->messages_to_be_sent[0];
552   for (i = 1; i < np; ++i) {
553     de->message_offsets[i] = total;
554     total = total + de->messages_to_be_sent[i];
555   }
556   /* init the packer counters */
557   de->total_pack_cnt = 0;
558   for (i = 0; i < np; ++i) {
559     de->pack_cnt[i] = 0;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565 *) Ensures data gets been packed appropriately and no overlaps occur
566 */
567 #undef __FUNCT__
568 #define __FUNCT__ "DataExPackData"
569 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data)
570 {
571   PetscMPIInt    local;
572   PetscInt       insert_location;
573   void           *dest;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   if (de->packer_status == DEOBJECT_FINALIZED) {
578     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
579   }
580   else if (de->packer_status != DEOBJECT_INITIALIZED) {
581     SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" );
582   }
583   if (!de->send_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" );
584   ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr);
585   if (local == -1) SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id );
586   if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) {
587     SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D",
588               (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] );
589     /* don't need this - the catch for too many messages will pick this up. Gives us more info though */
590     if (de->packer_status == DEOBJECT_FINALIZED) {
591       SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." );
592     }
593   }
594   /* copy memory */
595   insert_location = de->message_offsets[local] + de->pack_cnt[local];
596   dest = ((char*)de->send_message) + de->unit_message_size*insert_location;
597   ierr = PetscMemcpy(dest, data, de->unit_message_size * n);CHKERRQ(ierr);
598   /* increment counter */
599   de->pack_cnt[local] = de->pack_cnt[local] + n;
600   PetscFunctionReturn(0);
601 }
602 
603 /*
604 *) Ensures all data has been packed
605 */
606 #undef __FUNCT__
607 #define __FUNCT__ "DataExPackFinalize"
608 PetscErrorCode DataExPackFinalize(DataEx de)
609 {
610   PetscMPIInt i,np;
611   PetscInt    total;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   if (de->packer_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." );
616   np = de->n_neighbour_procs;
617   for (i = 0; i < np; ++i) {
618     if (de->pack_cnt[i] != de->messages_to_be_sent[i]) {
619       SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D",
620                 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] );
621     }
622   }
623   /* init */
624   for (i = 0; i < np; ++i) {
625     de->messages_to_be_recvieved[i] = -1;
626   }
627   /* figure out the recv counts here */
628   for (i = 0; i < np; ++i) {
629     ierr = MPI_Isend(&de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i]);CHKERRQ(ierr);
630   }
631   for (i = 0; i < np; ++i) {
632     ierr = MPI_Irecv(&de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i]);CHKERRQ(ierr);
633   }
634   ierr = MPI_Waitall(2*np, de->_requests, de->_stats);CHKERRQ(ierr);
635   /* create space for the data to be recvieved */
636   total = 0;
637   for (i = 0; i < np; ++i) {
638     total = total + de->messages_to_be_recvieved[i];
639   }
640   ierr = PetscMalloc(de->unit_message_size * (total + 1), &de->recv_message);CHKERRQ(ierr);
641   /* initialize memory */
642   ierr = PetscMemzero(de->recv_message, de->unit_message_size * (total + 1));CHKERRQ(ierr);
643   /* set total items to recieve */
644   de->recv_message_length = total;
645   de->packer_status = DEOBJECT_FINALIZED;
646   de->communication_status = DEOBJECT_INITIALIZED;
647   PetscFunctionReturn(0);
648 }
649 
650 /* do the actual message passing now */
651 #undef __FUNCT__
652 #define __FUNCT__ "DataExBegin"
653 PetscErrorCode DataExBegin(DataEx de)
654 {
655   PetscMPIInt i,np;
656   void       *dest;
657   PetscInt    length;
658   PetscErrorCode ierr;
659 
660   PetscFunctionBegin;
661   if (de->topology_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" );
662   if (de->message_lengths_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" );
663   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" );
664   if (de->communication_status == DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." );
665   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
666   ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
667   np = de->n_neighbour_procs;
668   /* == NON BLOCKING == */
669   for (i = 0; i < np; ++i) {
670     length = de->messages_to_be_sent[i] * de->unit_message_size;
671     dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i];
672     ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr);
673   }
674   ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 /* do the actual message passing now */
679 #undef __FUNCT__
680 #define __FUNCT__ "DataExEnd"
681 PetscErrorCode DataExEnd(DataEx de)
682 {
683   PetscMPIInt  i,np;
684   PetscInt     total;
685   PetscInt    *message_recv_offsets;
686   void        *dest;
687   PetscInt     length;
688   PetscErrorCode ierr;
689 
690   PetscFunctionBegin;
691   if (de->communication_status != DEOBJECT_INITIALIZED) SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." );
692   if (!de->recv_message) SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" );
693   ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
694   np = de->n_neighbour_procs;
695   ierr = PetscMalloc1(np, &message_recv_offsets);CHKERRQ(ierr);
696   message_recv_offsets[0] = 0;
697   total = de->messages_to_be_recvieved[0];
698   for (i = 1; i < np; ++i) {
699     message_recv_offsets[i] = total;
700     total = total + de->messages_to_be_recvieved[i];
701   }
702   /* == NON BLOCKING == */
703   for (i = 0; i < np; ++i) {
704     length = de->messages_to_be_recvieved[i] * de->unit_message_size;
705     dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i];
706     ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr);
707   }
708   ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr);
709   ierr = PetscFree(message_recv_offsets);
710   de->communication_status = DEOBJECT_FINALIZED;
711   ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "DataExGetSendData"
717 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send)
718 {
719   PetscFunctionBegin;
720   if (de->packer_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." );
721   *length = de->send_message_length;
722   *send   = de->send_message;
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "DataExGetRecvData"
728 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv)
729 {
730   PetscFunctionBegin;
731   if (de->communication_status != DEOBJECT_FINALIZED) SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." );
732   *length = de->recv_message_length;
733   *recv   = de->recv_message;
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "DataExTopologyGetNeighbours"
739 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[])
740 {
741   PetscFunctionBegin;
742   if (n)     {*n     = de->n_neighbour_procs;}
743   if (neigh) {*neigh = de->neighbour_procs;}
744   PetscFunctionReturn(0);
745 }
746 
747