xref: /petsc/src/tao/leastsquares/tutorials/chwirut2.c (revision bd35522dd00d2ebcbc9f656e2902fae240dc8904)
1 /*
2    Include "petsctao.h" so that we can use TAO solvers.  Note that this
3    file automatically includes libraries such as:
4      petsc.h       - base PETSc routines   petscvec.h - vectors
5      petscsys.h    - system routines        petscmat.h - matrices
6      petscis.h     - index sets            petscksp.h - Krylov subspace methods
7      petscviewer.h - viewers               petscpc.h  - preconditioners
8 
9 */
10 
11 #include <petsctao.h>
12 
13 /*
14 Description:   These data are the result of a NIST study involving
15                ultrasonic calibration.  The response variable is
16                ultrasonic response, and the predictor variable is
17                metal distance.
18 
19 Reference:     Chwirut, D., NIST (197?).
20                Ultrasonic Reference Block Study.
21 */
22 
23 static char help[]="Finds the nonlinear least-squares solution to the model \n\
24             y = exp[-b1*x]/(b2+b3*x)  +  e \n";
25 
26 #define NOBSERVATIONS 214
27 #define NPARAMETERS 3
28 
29 #define DIE_TAG 2000
30 #define IDLE_TAG 1000
31 
32 /* User-defined application context */
33 typedef struct {
34   /* Working space */
35   PetscReal   t[NOBSERVATIONS];   /* array of independent variables of observation */
36   PetscReal   y[NOBSERVATIONS];   /* array of dependent variables */
37   PetscMPIInt size,rank;
38 } AppCtx;
39 
40 /* User provided Routines */
41 PetscErrorCode InitializeData(AppCtx *user);
42 PetscErrorCode FormStartingPoint(Vec);
43 PetscErrorCode EvaluateFunction(Tao, Vec, Vec, void *);
44 PetscErrorCode TaskWorker(AppCtx *user);
45 PetscErrorCode StopWorkers(AppCtx *user);
46 PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user);
47 
48 /*--------------------------------------------------------------------*/
49 int main(int argc,char **argv)
50 {
51   Vec            x, f;               /* solution, function */
52   Tao            tao;                /* Tao solver context */
53   AppCtx         user;               /* user-defined work context */
54 
55    /* Initialize TAO and PETSc */
56   PetscFunctionBeginUser;
57   PetscCall(PetscInitialize(&argc,&argv,(char *)0,help));
58   MPI_Comm_size(MPI_COMM_WORLD,&user.size);
59   MPI_Comm_rank(MPI_COMM_WORLD,&user.rank);
60   PetscCall(InitializeData(&user));
61 
62   /* Run optimization on rank 0 */
63   if (user.rank == 0) {
64     /* Allocate vectors */
65     PetscCall(VecCreateSeq(PETSC_COMM_SELF,NPARAMETERS,&x));
66     PetscCall(VecCreateSeq(PETSC_COMM_SELF,NOBSERVATIONS,&f));
67 
68     /* TAO code begins here */
69 
70     /* Create TAO solver and set desired solution method */
71     PetscCall(TaoCreate(PETSC_COMM_SELF,&tao));
72     PetscCall(TaoSetType(tao,TAOPOUNDERS));
73 
74     /* Set the function and Jacobian routines. */
75     PetscCall(FormStartingPoint(x));
76     PetscCall(TaoSetSolution(tao,x));
77     PetscCall(TaoSetResidualRoutine(tao,f,EvaluateFunction,(void*)&user));
78 
79     /* Check for any TAO command line arguments */
80     PetscCall(TaoSetFromOptions(tao));
81 
82     /* Perform the Solve */
83     PetscCall(TaoSolve(tao));
84 
85     /* Free TAO data structures */
86     PetscCall(TaoDestroy(&tao));
87 
88     /* Free PETSc data structures */
89     PetscCall(VecDestroy(&x));
90     PetscCall(VecDestroy(&f));
91     StopWorkers(&user);
92   } else {
93     TaskWorker(&user);
94   }
95   PetscCall(PetscFinalize());
96   return 0;
97 }
98 
99 /*--------------------------------------------------------------------*/
100 PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr)
101 {
102   AppCtx         *user = (AppCtx *)ptr;
103   PetscInt       i;
104   PetscReal      *x,*f;
105 
106   PetscFunctionBegin;
107   PetscCall(VecGetArray(X,&x));
108   PetscCall(VecGetArray(F,&f));
109   if (user->size == 1) {
110     /* Single processor */
111     for (i=0;i<NOBSERVATIONS;i++) {
112       PetscCall(RunSimulation(x,i,&f[i],user));
113     }
114   } else {
115     /* Multiprocessor main */
116     PetscMPIInt tag;
117     PetscInt    finishedtasks,next_task,checkedin;
118     PetscReal   f_i=0.0;
119     MPI_Status  status;
120 
121     next_task=0;
122     finishedtasks=0;
123     checkedin=0;
124 
125     while (finishedtasks < NOBSERVATIONS || checkedin < user->size-1) {
126       PetscCallMPI(MPI_Recv(&f_i,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status));
127       if (status.MPI_TAG == IDLE_TAG) {
128         checkedin++;
129       } else {
130 
131         tag = status.MPI_TAG;
132         f[tag] = (PetscReal)f_i;
133         finishedtasks++;
134       }
135 
136       if (next_task<NOBSERVATIONS) {
137         PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,next_task,PETSC_COMM_WORLD));
138         next_task++;
139 
140       } else {
141         /* Send idle message */
142         PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,IDLE_TAG,PETSC_COMM_WORLD));
143       }
144     }
145   }
146   PetscCall(VecRestoreArray(X,&x));
147   PetscCall(VecRestoreArray(F,&f));
148   PetscLogFlops(6*NOBSERVATIONS);
149   PetscFunctionReturn(0);
150 }
151 
152 /* ------------------------------------------------------------ */
153 PetscErrorCode FormStartingPoint(Vec X)
154 {
155   PetscReal      *x;
156 
157   PetscFunctionBegin;
158   PetscCall(VecGetArray(X,&x));
159   x[0] = 0.15;
160   x[1] = 0.008;
161   x[2] = 0.010;
162   PetscCall(VecRestoreArray(X,&x));
163   PetscFunctionReturn(0);
164 }
165 
166 /* ---------------------------------------------------------------------- */
167 PetscErrorCode InitializeData(AppCtx *user)
168 {
169   PetscReal *t=user->t,*y=user->y;
170   PetscInt  i=0;
171 
172   PetscFunctionBegin;
173   y[i] =   92.9000;   t[i++] =  0.5000;
174   y[i] =    78.7000;  t[i++] =   0.6250;
175   y[i] =    64.2000;  t[i++] =   0.7500;
176   y[i] =    64.9000;  t[i++] =   0.8750;
177   y[i] =    57.1000;  t[i++] =   1.0000;
178   y[i] =    43.3000;  t[i++] =   1.2500;
179   y[i] =    31.1000;   t[i++] =  1.7500;
180   y[i] =    23.6000;   t[i++] =  2.2500;
181   y[i] =    31.0500;   t[i++] =  1.7500;
182   y[i] =    23.7750;   t[i++] =  2.2500;
183   y[i] =    17.7375;   t[i++] =  2.7500;
184   y[i] =    13.8000;   t[i++] =  3.2500;
185   y[i] =    11.5875;   t[i++] =  3.7500;
186   y[i] =     9.4125;   t[i++] =  4.2500;
187   y[i] =     7.7250;   t[i++] =  4.7500;
188   y[i] =     7.3500;   t[i++] =  5.2500;
189   y[i] =     8.0250;   t[i++] =  5.7500;
190   y[i] =    90.6000;   t[i++] =  0.5000;
191   y[i] =    76.9000;   t[i++] =  0.6250;
192   y[i] =    71.6000;   t[i++] = 0.7500;
193   y[i] =    63.6000;   t[i++] =  0.8750;
194   y[i] =    54.0000;   t[i++] =  1.0000;
195   y[i] =    39.2000;   t[i++] =  1.2500;
196   y[i] =    29.3000;   t[i++] = 1.7500;
197   y[i] =    21.4000;   t[i++] =  2.2500;
198   y[i] =    29.1750;   t[i++] =  1.7500;
199   y[i] =    22.1250;   t[i++] =  2.2500;
200   y[i] =    17.5125;   t[i++] =  2.7500;
201   y[i] =    14.2500;   t[i++] =  3.2500;
202   y[i] =     9.4500;   t[i++] =  3.7500;
203   y[i] =     9.1500;   t[i++] =  4.2500;
204   y[i] =     7.9125;   t[i++] =  4.7500;
205   y[i] =     8.4750;   t[i++] =  5.2500;
206   y[i] =     6.1125;   t[i++] =  5.7500;
207   y[i] =    80.0000;   t[i++] =  0.5000;
208   y[i] =    79.0000;   t[i++] =  0.6250;
209   y[i] =    63.8000;   t[i++] =  0.7500;
210   y[i] =    57.2000;   t[i++] =  0.8750;
211   y[i] =    53.2000;   t[i++] =  1.0000;
212   y[i] =   42.5000;   t[i++] =  1.2500;
213   y[i] =   26.8000;   t[i++] =  1.7500;
214   y[i] =    20.4000;   t[i++] =  2.2500;
215   y[i] =    26.8500;  t[i++] =   1.7500;
216   y[i] =    21.0000;  t[i++] =   2.2500;
217   y[i] =    16.4625;  t[i++] =   2.7500;
218   y[i] =    12.5250;  t[i++] =   3.2500;
219   y[i] =    10.5375;  t[i++] =   3.7500;
220   y[i] =     8.5875;  t[i++] =   4.2500;
221   y[i] =     7.1250;  t[i++] =   4.7500;
222   y[i] =     6.1125;  t[i++] =   5.2500;
223   y[i] =     5.9625;  t[i++] =   5.7500;
224   y[i] =    74.1000;  t[i++] =   0.5000;
225   y[i] =    67.3000;  t[i++] =   0.6250;
226   y[i] =    60.8000;  t[i++] =   0.7500;
227   y[i] =    55.5000;  t[i++] =   0.8750;
228   y[i] =    50.3000;  t[i++] =   1.0000;
229   y[i] =    41.0000;  t[i++] =   1.2500;
230   y[i] =    29.4000;  t[i++] =   1.7500;
231   y[i] =    20.4000;  t[i++] =   2.2500;
232   y[i] =    29.3625;  t[i++] =   1.7500;
233   y[i] =    21.1500;  t[i++] =   2.2500;
234   y[i] =    16.7625;  t[i++] =   2.7500;
235   y[i] =    13.2000;  t[i++] =   3.2500;
236   y[i] =    10.8750;  t[i++] =   3.7500;
237   y[i] =     8.1750;  t[i++] =   4.2500;
238   y[i] =     7.3500;  t[i++] =   4.7500;
239   y[i] =     5.9625;  t[i++] =  5.2500;
240   y[i] =     5.6250;  t[i++] =   5.7500;
241   y[i] =    81.5000;  t[i++] =    .5000;
242   y[i] =    62.4000;  t[i++] =    .7500;
243   y[i] =    32.5000;  t[i++] =   1.5000;
244   y[i] =    12.4100;  t[i++] =   3.0000;
245   y[i] =    13.1200;  t[i++] =   3.0000;
246   y[i] =    15.5600;  t[i++] =   3.0000;
247   y[i] =     5.6300;  t[i++] =   6.0000;
248   y[i] =    78.0000;   t[i++] =   .5000;
249   y[i] =    59.9000;  t[i++] =    .7500;
250   y[i] =    33.2000;  t[i++] =   1.5000;
251   y[i] =    13.8400;  t[i++] =   3.0000;
252   y[i] =    12.7500;  t[i++] =   3.0000;
253   y[i] =    14.6200;  t[i++] =   3.0000;
254   y[i] =     3.9400;  t[i++] =   6.0000;
255   y[i] =    76.8000;  t[i++] =    .5000;
256   y[i] =    61.0000;  t[i++] =    .7500;
257   y[i] =    32.9000;  t[i++] =   1.5000;
258   y[i] =   13.8700;   t[i++] = 3.0000;
259   y[i] =    11.8100;  t[i++] =   3.0000;
260   y[i] =    13.3100;  t[i++] =   3.0000;
261   y[i] =     5.4400;  t[i++] =   6.0000;
262   y[i] =    78.0000;  t[i++] =    .5000;
263   y[i] =    63.5000;  t[i++] =    .7500;
264   y[i] =    33.8000;  t[i++] =   1.5000;
265   y[i] =    12.5600;  t[i++] =   3.0000;
266   y[i] =     5.6300;  t[i++] =   6.0000;
267   y[i] =    12.7500;  t[i++] =   3.0000;
268   y[i] =    13.1200;  t[i++] =   3.0000;
269   y[i] =     5.4400;  t[i++] =   6.0000;
270   y[i] =    76.8000;  t[i++] =    .5000;
271   y[i] =    60.0000;  t[i++] =    .7500;
272   y[i] =    47.8000;  t[i++] =   1.0000;
273   y[i] =    32.0000;  t[i++] =   1.5000;
274   y[i] =    22.2000;  t[i++] =   2.0000;
275   y[i] =    22.5700;  t[i++] =   2.0000;
276   y[i] =    18.8200;  t[i++] =   2.5000;
277   y[i] =    13.9500;  t[i++] =   3.0000;
278   y[i] =    11.2500;  t[i++] =   4.0000;
279   y[i] =     9.0000;  t[i++] =   5.0000;
280   y[i] =     6.6700;  t[i++] =   6.0000;
281   y[i] =    75.8000;  t[i++] =    .5000;
282   y[i] =    62.0000;  t[i++] =    .7500;
283   y[i] =    48.8000;  t[i++] =   1.0000;
284   y[i] =    35.2000;  t[i++] =   1.5000;
285   y[i] =    20.0000;  t[i++] =   2.0000;
286   y[i] =    20.3200;  t[i++] =   2.0000;
287   y[i] =    19.3100;  t[i++] =   2.5000;
288   y[i] =    12.7500;  t[i++] =   3.0000;
289   y[i] =    10.4200;  t[i++] =   4.0000;
290   y[i] =     7.3100;  t[i++] =   5.0000;
291   y[i] =     7.4200;  t[i++] =   6.0000;
292   y[i] =    70.5000;  t[i++] =    .5000;
293   y[i] =    59.5000;  t[i++] =    .7500;
294   y[i] =    48.5000;  t[i++] =   1.0000;
295   y[i] =    35.8000;  t[i++] =   1.5000;
296   y[i] =    21.0000;  t[i++] =   2.0000;
297   y[i] =    21.6700;  t[i++] =   2.0000;
298   y[i] =    21.0000;  t[i++] =   2.5000;
299   y[i] =    15.6400;  t[i++] =   3.0000;
300   y[i] =     8.1700;  t[i++] =   4.0000;
301   y[i] =     8.5500;  t[i++] =   5.0000;
302   y[i] =    10.1200;  t[i++] =   6.0000;
303   y[i] =    78.0000;  t[i++] =    .5000;
304   y[i] =    66.0000;  t[i++] =    .6250;
305   y[i] =    62.0000;  t[i++] =    .7500;
306   y[i] =    58.0000;  t[i++] =    .8750;
307   y[i] =    47.7000;  t[i++] =   1.0000;
308   y[i] =    37.8000;  t[i++] =   1.2500;
309   y[i] =    20.2000;  t[i++] =   2.2500;
310   y[i] =    21.0700;  t[i++] =   2.2500;
311   y[i] =    13.8700;  t[i++] =   2.7500;
312   y[i] =     9.6700;  t[i++] =   3.2500;
313   y[i] =     7.7600;  t[i++] =   3.7500;
314   y[i] =    5.4400;   t[i++] =  4.2500;
315   y[i] =    4.8700;   t[i++] =  4.7500;
316   y[i] =     4.0100;  t[i++] =   5.2500;
317   y[i] =     3.7500;  t[i++] =   5.7500;
318   y[i] =    24.1900;  t[i++] =   3.0000;
319   y[i] =    25.7600;  t[i++] =   3.0000;
320   y[i] =    18.0700;  t[i++] =   3.0000;
321   y[i] =    11.8100;  t[i++] =   3.0000;
322   y[i] =    12.0700;  t[i++] =   3.0000;
323   y[i] =    16.1200;  t[i++] =   3.0000;
324   y[i] =    70.8000;  t[i++] =    .5000;
325   y[i] =    54.7000;  t[i++] =    .7500;
326   y[i] =    48.0000;  t[i++] =   1.0000;
327   y[i] =    39.8000;  t[i++] =   1.5000;
328   y[i] =    29.8000;  t[i++] =   2.0000;
329   y[i] =    23.7000;  t[i++] =   2.5000;
330   y[i] =    29.6200;  t[i++] =   2.0000;
331   y[i] =    23.8100;  t[i++] =   2.5000;
332   y[i] =    17.7000;  t[i++] =   3.0000;
333   y[i] =    11.5500;  t[i++] =   4.0000;
334   y[i] =    12.0700;  t[i++] =   5.0000;
335   y[i] =     8.7400;  t[i++] =   6.0000;
336   y[i] =    80.7000;  t[i++] =    .5000;
337   y[i] =    61.3000;  t[i++] =    .7500;
338   y[i] =    47.5000;  t[i++] =   1.0000;
339    y[i] =   29.0000;  t[i++] =   1.5000;
340    y[i] =   24.0000;  t[i++] =   2.0000;
341   y[i] =    17.7000;  t[i++] =   2.5000;
342   y[i] =    24.5600;  t[i++] =   2.0000;
343   y[i] =    18.6700;  t[i++] =   2.5000;
344    y[i] =   16.2400;  t[i++] =   3.0000;
345   y[i] =     8.7400;  t[i++] =   4.0000;
346   y[i] =     7.8700;  t[i++] =   5.0000;
347   y[i] =     8.5100;  t[i++] =   6.0000;
348   y[i] =    66.7000;  t[i++] =    .5000;
349   y[i] =    59.2000;  t[i++] =    .7500;
350   y[i] =    40.8000;  t[i++] =   1.0000;
351   y[i] =    30.7000;  t[i++] =   1.5000;
352   y[i] =    25.7000;  t[i++] =   2.0000;
353   y[i] =    16.3000;  t[i++] =   2.5000;
354   y[i] =    25.9900;  t[i++] =   2.0000;
355   y[i] =    16.9500;  t[i++] =   2.5000;
356   y[i] =    13.3500;  t[i++] =   3.0000;
357   y[i] =     8.6200;  t[i++] =   4.0000;
358   y[i] =     7.2000;  t[i++] =   5.0000;
359   y[i] =     6.6400;  t[i++] =   6.0000;
360   y[i] =    13.6900;  t[i++] =   3.0000;
361   y[i] =    81.0000;  t[i++] =    .5000;
362   y[i] =    64.5000;  t[i++] =    .7500;
363   y[i] =    35.5000;  t[i++] =   1.5000;
364    y[i] =   13.3100;  t[i++] =   3.0000;
365   y[i] =     4.8700;  t[i++] =   6.0000;
366   y[i] =    12.9400;  t[i++] =   3.0000;
367   y[i] =     5.0600;  t[i++] =   6.0000;
368   y[i] =    15.1900;  t[i++] =   3.0000;
369   y[i] =    14.6200;  t[i++] =   3.0000;
370   y[i] =    15.6400;  t[i++] =   3.0000;
371   y[i] =    25.5000;  t[i++] =   1.7500;
372   y[i] =    25.9500;  t[i++] =   1.7500;
373   y[i] =    81.7000;  t[i++] =    .5000;
374   y[i] =    61.6000;  t[i++] =    .7500;
375   y[i] =    29.8000;  t[i++] =   1.7500;
376   y[i] =    29.8100;  t[i++] =   1.7500;
377   y[i] =    17.1700;  t[i++] =   2.7500;
378   y[i] =    10.3900;  t[i++] =   3.7500;
379   y[i] =    28.4000;  t[i++] =   1.7500;
380   y[i] =    28.6900;  t[i++] =   1.7500;
381   y[i] =    81.3000;  t[i++] =    .5000;
382   y[i] =    60.9000;  t[i++] =    .7500;
383   y[i] =    16.6500;  t[i++] =   2.7500;
384   y[i] =    10.0500;  t[i++] =   3.7500;
385   y[i] =    28.9000;  t[i++] =   1.7500;
386   y[i] =    28.9500;  t[i++] =   1.7500;
387   PetscFunctionReturn(0);
388 }
389 
390 PetscErrorCode TaskWorker(AppCtx *user)
391 {
392   PetscReal      x[NPARAMETERS],f = 0.0;
393   PetscMPIInt    tag=IDLE_TAG;
394   PetscInt       index;
395   MPI_Status     status;
396 
397   PetscFunctionBegin;
398   /* Send check-in message to rank-0 */
399 
400   PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD));
401   while (tag != DIE_TAG) {
402     PetscCallMPI(MPI_Recv(x,NPARAMETERS,MPIU_REAL,0,MPI_ANY_TAG,PETSC_COMM_WORLD,&status));
403     tag = status.MPI_TAG;
404     if (tag == IDLE_TAG) {
405       PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD));
406     } else if (tag != DIE_TAG) {
407       index = (PetscInt)tag;
408       PetscCall(RunSimulation(x,index,&f,user));
409       PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,tag,PETSC_COMM_WORLD));
410     }
411   }
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user)
416 {
417   PetscReal *t = user->t;
418   PetscReal *y = user->y;
419 #if defined(PETSC_USE_REAL_SINGLE)
420   *f = y[i] - exp(-x[0]*t[i])/(x[1] + x[2]*t[i]); /* expf() for single-precision breaks this example on Freebsd, Valgrind errors on Linux */
421 #else
422   *f = y[i] - PetscExpScalar(-x[0]*t[i])/(x[1] + x[2]*t[i]);
423 #endif
424   return(0);
425 }
426 
427 PetscErrorCode StopWorkers(AppCtx *user)
428 {
429   PetscInt       checkedin;
430   MPI_Status     status;
431   PetscReal      f,x[NPARAMETERS];
432 
433   PetscFunctionBegin;
434   checkedin=0;
435   while (checkedin < user->size-1) {
436     PetscCallMPI(MPI_Recv(&f,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status));
437     checkedin++;
438     PetscCall(PetscArrayzero(x,NPARAMETERS));
439     PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,DIE_TAG,PETSC_COMM_WORLD));
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 /*TEST
445 
446    build:
447       requires: !complex
448 
449    test:
450       nsize: 3
451       requires: !single
452       args: -tao_smonitor -tao_max_it 100 -tao_type pounders -tao_gatol 1.e-5
453 
454 TEST*/
455