1 /* 2 Include "petsctao.h" so that we can use TAO solvers. Note that this 3 file automatically includes libraries such as: 4 petsc.h - base PETSc routines petscvec.h - vectors 5 petscsys.h - system routines petscmat.h - matrices 6 petscis.h - index sets petscksp.h - Krylov subspace methods 7 petscviewer.h - viewers petscpc.h - preconditioners 8 9 */ 10 11 #include <petsctao.h> 12 13 /* 14 Description: These data are the result of a NIST study involving 15 ultrasonic calibration. The response variable is 16 ultrasonic response, and the predictor variable is 17 metal distance. 18 19 Reference: Chwirut, D., NIST (197?). 20 Ultrasonic Reference Block Study. 21 */ 22 23 static char help[]="Finds the nonlinear least-squares solution to the model \n\ 24 y = exp[-b1*x]/(b2+b3*x) + e \n"; 25 26 #define NOBSERVATIONS 214 27 #define NPARAMETERS 3 28 29 #define DIE_TAG 2000 30 #define IDLE_TAG 1000 31 32 /* User-defined application context */ 33 typedef struct { 34 /* Working space */ 35 PetscReal t[NOBSERVATIONS]; /* array of independent variables of observation */ 36 PetscReal y[NOBSERVATIONS]; /* array of dependent variables */ 37 PetscMPIInt size,rank; 38 } AppCtx; 39 40 /* User provided Routines */ 41 PetscErrorCode InitializeData(AppCtx *user); 42 PetscErrorCode FormStartingPoint(Vec); 43 PetscErrorCode EvaluateFunction(Tao, Vec, Vec, void *); 44 PetscErrorCode TaskWorker(AppCtx *user); 45 PetscErrorCode StopWorkers(AppCtx *user); 46 PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user); 47 48 /*--------------------------------------------------------------------*/ 49 int main(int argc,char **argv) 50 { 51 Vec x, f; /* solution, function */ 52 Tao tao; /* Tao solver context */ 53 AppCtx user; /* user-defined work context */ 54 55 /* Initialize TAO and PETSc */ 56 PetscFunctionBeginUser; 57 PetscCall(PetscInitialize(&argc,&argv,(char *)0,help)); 58 MPI_Comm_size(MPI_COMM_WORLD,&user.size); 59 MPI_Comm_rank(MPI_COMM_WORLD,&user.rank); 60 PetscCall(InitializeData(&user)); 61 62 /* Run optimization on rank 0 */ 63 if (user.rank == 0) { 64 /* Allocate vectors */ 65 PetscCall(VecCreateSeq(PETSC_COMM_SELF,NPARAMETERS,&x)); 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF,NOBSERVATIONS,&f)); 67 68 /* TAO code begins here */ 69 70 /* Create TAO solver and set desired solution method */ 71 PetscCall(TaoCreate(PETSC_COMM_SELF,&tao)); 72 PetscCall(TaoSetType(tao,TAOPOUNDERS)); 73 74 /* Set the function and Jacobian routines. */ 75 PetscCall(FormStartingPoint(x)); 76 PetscCall(TaoSetSolution(tao,x)); 77 PetscCall(TaoSetResidualRoutine(tao,f,EvaluateFunction,(void*)&user)); 78 79 /* Check for any TAO command line arguments */ 80 PetscCall(TaoSetFromOptions(tao)); 81 82 /* Perform the Solve */ 83 PetscCall(TaoSolve(tao)); 84 85 /* Free TAO data structures */ 86 PetscCall(TaoDestroy(&tao)); 87 88 /* Free PETSc data structures */ 89 PetscCall(VecDestroy(&x)); 90 PetscCall(VecDestroy(&f)); 91 StopWorkers(&user); 92 } else { 93 TaskWorker(&user); 94 } 95 PetscCall(PetscFinalize()); 96 return 0; 97 } 98 99 /*--------------------------------------------------------------------*/ 100 PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr) 101 { 102 AppCtx *user = (AppCtx *)ptr; 103 PetscInt i; 104 PetscReal *x,*f; 105 106 PetscFunctionBegin; 107 PetscCall(VecGetArray(X,&x)); 108 PetscCall(VecGetArray(F,&f)); 109 if (user->size == 1) { 110 /* Single processor */ 111 for (i=0;i<NOBSERVATIONS;i++) { 112 PetscCall(RunSimulation(x,i,&f[i],user)); 113 } 114 } else { 115 /* Multiprocessor main */ 116 PetscMPIInt tag; 117 PetscInt finishedtasks,next_task,checkedin; 118 PetscReal f_i=0.0; 119 MPI_Status status; 120 121 next_task=0; 122 finishedtasks=0; 123 checkedin=0; 124 125 while (finishedtasks < NOBSERVATIONS || checkedin < user->size-1) { 126 PetscCallMPI(MPI_Recv(&f_i,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status)); 127 if (status.MPI_TAG == IDLE_TAG) { 128 checkedin++; 129 } else { 130 131 tag = status.MPI_TAG; 132 f[tag] = (PetscReal)f_i; 133 finishedtasks++; 134 } 135 136 if (next_task<NOBSERVATIONS) { 137 PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,next_task,PETSC_COMM_WORLD)); 138 next_task++; 139 140 } else { 141 /* Send idle message */ 142 PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,IDLE_TAG,PETSC_COMM_WORLD)); 143 } 144 } 145 } 146 PetscCall(VecRestoreArray(X,&x)); 147 PetscCall(VecRestoreArray(F,&f)); 148 PetscLogFlops(6*NOBSERVATIONS); 149 PetscFunctionReturn(0); 150 } 151 152 /* ------------------------------------------------------------ */ 153 PetscErrorCode FormStartingPoint(Vec X) 154 { 155 PetscReal *x; 156 157 PetscFunctionBegin; 158 PetscCall(VecGetArray(X,&x)); 159 x[0] = 0.15; 160 x[1] = 0.008; 161 x[2] = 0.010; 162 PetscCall(VecRestoreArray(X,&x)); 163 PetscFunctionReturn(0); 164 } 165 166 /* ---------------------------------------------------------------------- */ 167 PetscErrorCode InitializeData(AppCtx *user) 168 { 169 PetscReal *t=user->t,*y=user->y; 170 PetscInt i=0; 171 172 PetscFunctionBegin; 173 y[i] = 92.9000; t[i++] = 0.5000; 174 y[i] = 78.7000; t[i++] = 0.6250; 175 y[i] = 64.2000; t[i++] = 0.7500; 176 y[i] = 64.9000; t[i++] = 0.8750; 177 y[i] = 57.1000; t[i++] = 1.0000; 178 y[i] = 43.3000; t[i++] = 1.2500; 179 y[i] = 31.1000; t[i++] = 1.7500; 180 y[i] = 23.6000; t[i++] = 2.2500; 181 y[i] = 31.0500; t[i++] = 1.7500; 182 y[i] = 23.7750; t[i++] = 2.2500; 183 y[i] = 17.7375; t[i++] = 2.7500; 184 y[i] = 13.8000; t[i++] = 3.2500; 185 y[i] = 11.5875; t[i++] = 3.7500; 186 y[i] = 9.4125; t[i++] = 4.2500; 187 y[i] = 7.7250; t[i++] = 4.7500; 188 y[i] = 7.3500; t[i++] = 5.2500; 189 y[i] = 8.0250; t[i++] = 5.7500; 190 y[i] = 90.6000; t[i++] = 0.5000; 191 y[i] = 76.9000; t[i++] = 0.6250; 192 y[i] = 71.6000; t[i++] = 0.7500; 193 y[i] = 63.6000; t[i++] = 0.8750; 194 y[i] = 54.0000; t[i++] = 1.0000; 195 y[i] = 39.2000; t[i++] = 1.2500; 196 y[i] = 29.3000; t[i++] = 1.7500; 197 y[i] = 21.4000; t[i++] = 2.2500; 198 y[i] = 29.1750; t[i++] = 1.7500; 199 y[i] = 22.1250; t[i++] = 2.2500; 200 y[i] = 17.5125; t[i++] = 2.7500; 201 y[i] = 14.2500; t[i++] = 3.2500; 202 y[i] = 9.4500; t[i++] = 3.7500; 203 y[i] = 9.1500; t[i++] = 4.2500; 204 y[i] = 7.9125; t[i++] = 4.7500; 205 y[i] = 8.4750; t[i++] = 5.2500; 206 y[i] = 6.1125; t[i++] = 5.7500; 207 y[i] = 80.0000; t[i++] = 0.5000; 208 y[i] = 79.0000; t[i++] = 0.6250; 209 y[i] = 63.8000; t[i++] = 0.7500; 210 y[i] = 57.2000; t[i++] = 0.8750; 211 y[i] = 53.2000; t[i++] = 1.0000; 212 y[i] = 42.5000; t[i++] = 1.2500; 213 y[i] = 26.8000; t[i++] = 1.7500; 214 y[i] = 20.4000; t[i++] = 2.2500; 215 y[i] = 26.8500; t[i++] = 1.7500; 216 y[i] = 21.0000; t[i++] = 2.2500; 217 y[i] = 16.4625; t[i++] = 2.7500; 218 y[i] = 12.5250; t[i++] = 3.2500; 219 y[i] = 10.5375; t[i++] = 3.7500; 220 y[i] = 8.5875; t[i++] = 4.2500; 221 y[i] = 7.1250; t[i++] = 4.7500; 222 y[i] = 6.1125; t[i++] = 5.2500; 223 y[i] = 5.9625; t[i++] = 5.7500; 224 y[i] = 74.1000; t[i++] = 0.5000; 225 y[i] = 67.3000; t[i++] = 0.6250; 226 y[i] = 60.8000; t[i++] = 0.7500; 227 y[i] = 55.5000; t[i++] = 0.8750; 228 y[i] = 50.3000; t[i++] = 1.0000; 229 y[i] = 41.0000; t[i++] = 1.2500; 230 y[i] = 29.4000; t[i++] = 1.7500; 231 y[i] = 20.4000; t[i++] = 2.2500; 232 y[i] = 29.3625; t[i++] = 1.7500; 233 y[i] = 21.1500; t[i++] = 2.2500; 234 y[i] = 16.7625; t[i++] = 2.7500; 235 y[i] = 13.2000; t[i++] = 3.2500; 236 y[i] = 10.8750; t[i++] = 3.7500; 237 y[i] = 8.1750; t[i++] = 4.2500; 238 y[i] = 7.3500; t[i++] = 4.7500; 239 y[i] = 5.9625; t[i++] = 5.2500; 240 y[i] = 5.6250; t[i++] = 5.7500; 241 y[i] = 81.5000; t[i++] = .5000; 242 y[i] = 62.4000; t[i++] = .7500; 243 y[i] = 32.5000; t[i++] = 1.5000; 244 y[i] = 12.4100; t[i++] = 3.0000; 245 y[i] = 13.1200; t[i++] = 3.0000; 246 y[i] = 15.5600; t[i++] = 3.0000; 247 y[i] = 5.6300; t[i++] = 6.0000; 248 y[i] = 78.0000; t[i++] = .5000; 249 y[i] = 59.9000; t[i++] = .7500; 250 y[i] = 33.2000; t[i++] = 1.5000; 251 y[i] = 13.8400; t[i++] = 3.0000; 252 y[i] = 12.7500; t[i++] = 3.0000; 253 y[i] = 14.6200; t[i++] = 3.0000; 254 y[i] = 3.9400; t[i++] = 6.0000; 255 y[i] = 76.8000; t[i++] = .5000; 256 y[i] = 61.0000; t[i++] = .7500; 257 y[i] = 32.9000; t[i++] = 1.5000; 258 y[i] = 13.8700; t[i++] = 3.0000; 259 y[i] = 11.8100; t[i++] = 3.0000; 260 y[i] = 13.3100; t[i++] = 3.0000; 261 y[i] = 5.4400; t[i++] = 6.0000; 262 y[i] = 78.0000; t[i++] = .5000; 263 y[i] = 63.5000; t[i++] = .7500; 264 y[i] = 33.8000; t[i++] = 1.5000; 265 y[i] = 12.5600; t[i++] = 3.0000; 266 y[i] = 5.6300; t[i++] = 6.0000; 267 y[i] = 12.7500; t[i++] = 3.0000; 268 y[i] = 13.1200; t[i++] = 3.0000; 269 y[i] = 5.4400; t[i++] = 6.0000; 270 y[i] = 76.8000; t[i++] = .5000; 271 y[i] = 60.0000; t[i++] = .7500; 272 y[i] = 47.8000; t[i++] = 1.0000; 273 y[i] = 32.0000; t[i++] = 1.5000; 274 y[i] = 22.2000; t[i++] = 2.0000; 275 y[i] = 22.5700; t[i++] = 2.0000; 276 y[i] = 18.8200; t[i++] = 2.5000; 277 y[i] = 13.9500; t[i++] = 3.0000; 278 y[i] = 11.2500; t[i++] = 4.0000; 279 y[i] = 9.0000; t[i++] = 5.0000; 280 y[i] = 6.6700; t[i++] = 6.0000; 281 y[i] = 75.8000; t[i++] = .5000; 282 y[i] = 62.0000; t[i++] = .7500; 283 y[i] = 48.8000; t[i++] = 1.0000; 284 y[i] = 35.2000; t[i++] = 1.5000; 285 y[i] = 20.0000; t[i++] = 2.0000; 286 y[i] = 20.3200; t[i++] = 2.0000; 287 y[i] = 19.3100; t[i++] = 2.5000; 288 y[i] = 12.7500; t[i++] = 3.0000; 289 y[i] = 10.4200; t[i++] = 4.0000; 290 y[i] = 7.3100; t[i++] = 5.0000; 291 y[i] = 7.4200; t[i++] = 6.0000; 292 y[i] = 70.5000; t[i++] = .5000; 293 y[i] = 59.5000; t[i++] = .7500; 294 y[i] = 48.5000; t[i++] = 1.0000; 295 y[i] = 35.8000; t[i++] = 1.5000; 296 y[i] = 21.0000; t[i++] = 2.0000; 297 y[i] = 21.6700; t[i++] = 2.0000; 298 y[i] = 21.0000; t[i++] = 2.5000; 299 y[i] = 15.6400; t[i++] = 3.0000; 300 y[i] = 8.1700; t[i++] = 4.0000; 301 y[i] = 8.5500; t[i++] = 5.0000; 302 y[i] = 10.1200; t[i++] = 6.0000; 303 y[i] = 78.0000; t[i++] = .5000; 304 y[i] = 66.0000; t[i++] = .6250; 305 y[i] = 62.0000; t[i++] = .7500; 306 y[i] = 58.0000; t[i++] = .8750; 307 y[i] = 47.7000; t[i++] = 1.0000; 308 y[i] = 37.8000; t[i++] = 1.2500; 309 y[i] = 20.2000; t[i++] = 2.2500; 310 y[i] = 21.0700; t[i++] = 2.2500; 311 y[i] = 13.8700; t[i++] = 2.7500; 312 y[i] = 9.6700; t[i++] = 3.2500; 313 y[i] = 7.7600; t[i++] = 3.7500; 314 y[i] = 5.4400; t[i++] = 4.2500; 315 y[i] = 4.8700; t[i++] = 4.7500; 316 y[i] = 4.0100; t[i++] = 5.2500; 317 y[i] = 3.7500; t[i++] = 5.7500; 318 y[i] = 24.1900; t[i++] = 3.0000; 319 y[i] = 25.7600; t[i++] = 3.0000; 320 y[i] = 18.0700; t[i++] = 3.0000; 321 y[i] = 11.8100; t[i++] = 3.0000; 322 y[i] = 12.0700; t[i++] = 3.0000; 323 y[i] = 16.1200; t[i++] = 3.0000; 324 y[i] = 70.8000; t[i++] = .5000; 325 y[i] = 54.7000; t[i++] = .7500; 326 y[i] = 48.0000; t[i++] = 1.0000; 327 y[i] = 39.8000; t[i++] = 1.5000; 328 y[i] = 29.8000; t[i++] = 2.0000; 329 y[i] = 23.7000; t[i++] = 2.5000; 330 y[i] = 29.6200; t[i++] = 2.0000; 331 y[i] = 23.8100; t[i++] = 2.5000; 332 y[i] = 17.7000; t[i++] = 3.0000; 333 y[i] = 11.5500; t[i++] = 4.0000; 334 y[i] = 12.0700; t[i++] = 5.0000; 335 y[i] = 8.7400; t[i++] = 6.0000; 336 y[i] = 80.7000; t[i++] = .5000; 337 y[i] = 61.3000; t[i++] = .7500; 338 y[i] = 47.5000; t[i++] = 1.0000; 339 y[i] = 29.0000; t[i++] = 1.5000; 340 y[i] = 24.0000; t[i++] = 2.0000; 341 y[i] = 17.7000; t[i++] = 2.5000; 342 y[i] = 24.5600; t[i++] = 2.0000; 343 y[i] = 18.6700; t[i++] = 2.5000; 344 y[i] = 16.2400; t[i++] = 3.0000; 345 y[i] = 8.7400; t[i++] = 4.0000; 346 y[i] = 7.8700; t[i++] = 5.0000; 347 y[i] = 8.5100; t[i++] = 6.0000; 348 y[i] = 66.7000; t[i++] = .5000; 349 y[i] = 59.2000; t[i++] = .7500; 350 y[i] = 40.8000; t[i++] = 1.0000; 351 y[i] = 30.7000; t[i++] = 1.5000; 352 y[i] = 25.7000; t[i++] = 2.0000; 353 y[i] = 16.3000; t[i++] = 2.5000; 354 y[i] = 25.9900; t[i++] = 2.0000; 355 y[i] = 16.9500; t[i++] = 2.5000; 356 y[i] = 13.3500; t[i++] = 3.0000; 357 y[i] = 8.6200; t[i++] = 4.0000; 358 y[i] = 7.2000; t[i++] = 5.0000; 359 y[i] = 6.6400; t[i++] = 6.0000; 360 y[i] = 13.6900; t[i++] = 3.0000; 361 y[i] = 81.0000; t[i++] = .5000; 362 y[i] = 64.5000; t[i++] = .7500; 363 y[i] = 35.5000; t[i++] = 1.5000; 364 y[i] = 13.3100; t[i++] = 3.0000; 365 y[i] = 4.8700; t[i++] = 6.0000; 366 y[i] = 12.9400; t[i++] = 3.0000; 367 y[i] = 5.0600; t[i++] = 6.0000; 368 y[i] = 15.1900; t[i++] = 3.0000; 369 y[i] = 14.6200; t[i++] = 3.0000; 370 y[i] = 15.6400; t[i++] = 3.0000; 371 y[i] = 25.5000; t[i++] = 1.7500; 372 y[i] = 25.9500; t[i++] = 1.7500; 373 y[i] = 81.7000; t[i++] = .5000; 374 y[i] = 61.6000; t[i++] = .7500; 375 y[i] = 29.8000; t[i++] = 1.7500; 376 y[i] = 29.8100; t[i++] = 1.7500; 377 y[i] = 17.1700; t[i++] = 2.7500; 378 y[i] = 10.3900; t[i++] = 3.7500; 379 y[i] = 28.4000; t[i++] = 1.7500; 380 y[i] = 28.6900; t[i++] = 1.7500; 381 y[i] = 81.3000; t[i++] = .5000; 382 y[i] = 60.9000; t[i++] = .7500; 383 y[i] = 16.6500; t[i++] = 2.7500; 384 y[i] = 10.0500; t[i++] = 3.7500; 385 y[i] = 28.9000; t[i++] = 1.7500; 386 y[i] = 28.9500; t[i++] = 1.7500; 387 PetscFunctionReturn(0); 388 } 389 390 PetscErrorCode TaskWorker(AppCtx *user) 391 { 392 PetscReal x[NPARAMETERS],f = 0.0; 393 PetscMPIInt tag=IDLE_TAG; 394 PetscInt index; 395 MPI_Status status; 396 397 PetscFunctionBegin; 398 /* Send check-in message to rank-0 */ 399 400 PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD)); 401 while (tag != DIE_TAG) { 402 PetscCallMPI(MPI_Recv(x,NPARAMETERS,MPIU_REAL,0,MPI_ANY_TAG,PETSC_COMM_WORLD,&status)); 403 tag = status.MPI_TAG; 404 if (tag == IDLE_TAG) { 405 PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD)); 406 } else if (tag != DIE_TAG) { 407 index = (PetscInt)tag; 408 PetscCall(RunSimulation(x,index,&f,user)); 409 PetscCallMPI(MPI_Send(&f,1,MPIU_REAL,0,tag,PETSC_COMM_WORLD)); 410 } 411 } 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user) 416 { 417 PetscReal *t = user->t; 418 PetscReal *y = user->y; 419 #if defined(PETSC_USE_REAL_SINGLE) 420 *f = y[i] - exp(-x[0]*t[i])/(x[1] + x[2]*t[i]); /* expf() for single-precision breaks this example on Freebsd, Valgrind errors on Linux */ 421 #else 422 *f = y[i] - PetscExpScalar(-x[0]*t[i])/(x[1] + x[2]*t[i]); 423 #endif 424 return(0); 425 } 426 427 PetscErrorCode StopWorkers(AppCtx *user) 428 { 429 PetscInt checkedin; 430 MPI_Status status; 431 PetscReal f,x[NPARAMETERS]; 432 433 PetscFunctionBegin; 434 checkedin=0; 435 while (checkedin < user->size-1) { 436 PetscCallMPI(MPI_Recv(&f,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status)); 437 checkedin++; 438 PetscCall(PetscArrayzero(x,NPARAMETERS)); 439 PetscCallMPI(MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,DIE_TAG,PETSC_COMM_WORLD)); 440 } 441 PetscFunctionReturn(0); 442 } 443 444 /*TEST 445 446 build: 447 requires: !complex 448 449 test: 450 nsize: 3 451 requires: !single 452 args: -tao_smonitor -tao_max_it 100 -tao_type pounders -tao_gatol 1.e-5 453 454 TEST*/ 455