1c4762a1bSJed Brown /* 2c4762a1bSJed Brown Include "petsctao.h" so that we can use TAO solvers. Note that this 3c4762a1bSJed Brown file automatically includes libraries such as: 4c4762a1bSJed Brown petsc.h - base PETSc routines petscvec.h - vectors 5a5b23f4aSJose E. Roman petscsys.h - system routines petscmat.h - matrices 6c4762a1bSJed Brown petscis.h - index sets petscksp.h - Krylov subspace methods 7c4762a1bSJed Brown petscviewer.h - viewers petscpc.h - preconditioners 8c4762a1bSJed Brown 9c4762a1bSJed Brown */ 10c4762a1bSJed Brown 11c4762a1bSJed Brown #include <petsctao.h> 12c4762a1bSJed Brown 13c4762a1bSJed Brown /* 14c4762a1bSJed Brown Description: These data are the result of a NIST study involving 15c4762a1bSJed Brown ultrasonic calibration. The response variable is 16c4762a1bSJed Brown ultrasonic response, and the predictor variable is 17c4762a1bSJed Brown metal distance. 18c4762a1bSJed Brown 19c4762a1bSJed Brown Reference: Chwirut, D., NIST (197?). 20c4762a1bSJed Brown Ultrasonic Reference Block Study. 21c4762a1bSJed Brown */ 22c4762a1bSJed Brown 23c4762a1bSJed Brown static char help[] = "Finds the nonlinear least-squares solution to the model \n\ 24c4762a1bSJed Brown y = exp[-b1*x]/(b2+b3*x) + e \n"; 25c4762a1bSJed Brown 26c4762a1bSJed Brown #define NOBSERVATIONS 214 27c4762a1bSJed Brown #define NPARAMETERS 3 28c4762a1bSJed Brown 29c4762a1bSJed Brown #define DIE_TAG 2000 30c4762a1bSJed Brown #define IDLE_TAG 1000 31c4762a1bSJed Brown 32c4762a1bSJed Brown /* User-defined application context */ 33c4762a1bSJed Brown typedef struct { 34c4762a1bSJed Brown /* Working space */ 35c4762a1bSJed Brown PetscReal t[NOBSERVATIONS]; /* array of independent variables of observation */ 36c4762a1bSJed Brown PetscReal y[NOBSERVATIONS]; /* array of dependent variables */ 37c4762a1bSJed Brown PetscMPIInt size, rank; 38c4762a1bSJed Brown } AppCtx; 39c4762a1bSJed Brown 40c4762a1bSJed Brown /* User provided Routines */ 41c4762a1bSJed Brown PetscErrorCode InitializeData(AppCtx *user); 42c4762a1bSJed Brown PetscErrorCode FormStartingPoint(Vec); 43c4762a1bSJed Brown PetscErrorCode EvaluateFunction(Tao, Vec, Vec, void *); 44c4762a1bSJed Brown PetscErrorCode TaskWorker(AppCtx *user); 45c4762a1bSJed Brown PetscErrorCode StopWorkers(AppCtx *user); 46c4762a1bSJed Brown PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal *f, AppCtx *user); 47c4762a1bSJed Brown 48c4762a1bSJed Brown /*--------------------------------------------------------------------*/ 49*9371c9d4SSatish Balay int main(int argc, char **argv) { 50c4762a1bSJed Brown Vec x, f; /* solution, function */ 51c4762a1bSJed Brown Tao tao; /* Tao solver context */ 52c4762a1bSJed Brown AppCtx user; /* user-defined work context */ 53c4762a1bSJed Brown 54c4762a1bSJed Brown /* Initialize TAO and PETSc */ 55327415f7SBarry Smith PetscFunctionBeginUser; 569566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 57c4762a1bSJed Brown MPI_Comm_size(MPI_COMM_WORLD, &user.size); 58c4762a1bSJed Brown MPI_Comm_rank(MPI_COMM_WORLD, &user.rank); 599566063dSJacob Faibussowitsch PetscCall(InitializeData(&user)); 60c4762a1bSJed Brown 61c4762a1bSJed Brown /* Run optimization on rank 0 */ 62c4762a1bSJed Brown if (user.rank == 0) { 63c4762a1bSJed Brown /* Allocate vectors */ 649566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, NPARAMETERS, &x)); 659566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, NOBSERVATIONS, &f)); 66c4762a1bSJed Brown 67c4762a1bSJed Brown /* TAO code begins here */ 68c4762a1bSJed Brown 69c4762a1bSJed Brown /* Create TAO solver and set desired solution method */ 709566063dSJacob Faibussowitsch PetscCall(TaoCreate(PETSC_COMM_SELF, &tao)); 719566063dSJacob Faibussowitsch PetscCall(TaoSetType(tao, TAOPOUNDERS)); 72c4762a1bSJed Brown 73c4762a1bSJed Brown /* Set the function and Jacobian routines. */ 749566063dSJacob Faibussowitsch PetscCall(FormStartingPoint(x)); 759566063dSJacob Faibussowitsch PetscCall(TaoSetSolution(tao, x)); 769566063dSJacob Faibussowitsch PetscCall(TaoSetResidualRoutine(tao, f, EvaluateFunction, (void *)&user)); 77c4762a1bSJed Brown 78c4762a1bSJed Brown /* Check for any TAO command line arguments */ 799566063dSJacob Faibussowitsch PetscCall(TaoSetFromOptions(tao)); 80c4762a1bSJed Brown 81c4762a1bSJed Brown /* Perform the Solve */ 829566063dSJacob Faibussowitsch PetscCall(TaoSolve(tao)); 83c4762a1bSJed Brown 84c4762a1bSJed Brown /* Free TAO data structures */ 859566063dSJacob Faibussowitsch PetscCall(TaoDestroy(&tao)); 86c4762a1bSJed Brown 87c4762a1bSJed Brown /* Free PETSc data structures */ 889566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 899566063dSJacob Faibussowitsch PetscCall(VecDestroy(&f)); 90c4762a1bSJed Brown StopWorkers(&user); 91c4762a1bSJed Brown } else { 92c4762a1bSJed Brown TaskWorker(&user); 93c4762a1bSJed Brown } 949566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 95b122ec5aSJacob Faibussowitsch return 0; 96c4762a1bSJed Brown } 97c4762a1bSJed Brown 98c4762a1bSJed Brown /*--------------------------------------------------------------------*/ 99*9371c9d4SSatish Balay PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr) { 100c4762a1bSJed Brown AppCtx *user = (AppCtx *)ptr; 101c4762a1bSJed Brown PetscInt i; 102c4762a1bSJed Brown PetscReal *x, *f; 103c4762a1bSJed Brown 104c4762a1bSJed Brown PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 1069566063dSJacob Faibussowitsch PetscCall(VecGetArray(F, &f)); 107c4762a1bSJed Brown if (user->size == 1) { 108c4762a1bSJed Brown /* Single processor */ 109*9371c9d4SSatish Balay for (i = 0; i < NOBSERVATIONS; i++) { PetscCall(RunSimulation(x, i, &f[i], user)); } 110c4762a1bSJed Brown } else { 1119dddd249SSatish Balay /* Multiprocessor main */ 112c4762a1bSJed Brown PetscMPIInt tag; 113c4762a1bSJed Brown PetscInt finishedtasks, next_task, checkedin; 114c4762a1bSJed Brown PetscReal f_i = 0.0; 115c4762a1bSJed Brown MPI_Status status; 116c4762a1bSJed Brown 117c4762a1bSJed Brown next_task = 0; 118c4762a1bSJed Brown finishedtasks = 0; 119c4762a1bSJed Brown checkedin = 0; 120c4762a1bSJed Brown 121c4762a1bSJed Brown while (finishedtasks < NOBSERVATIONS || checkedin < user->size - 1) { 1229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(&f_i, 1, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, &status)); 123c4762a1bSJed Brown if (status.MPI_TAG == IDLE_TAG) { 124c4762a1bSJed Brown checkedin++; 125c4762a1bSJed Brown } else { 126c4762a1bSJed Brown tag = status.MPI_TAG; 127c4762a1bSJed Brown f[tag] = (PetscReal)f_i; 128c4762a1bSJed Brown finishedtasks++; 129c4762a1bSJed Brown } 130c4762a1bSJed Brown 131c4762a1bSJed Brown if (next_task < NOBSERVATIONS) { 1329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, next_task, PETSC_COMM_WORLD)); 133c4762a1bSJed Brown next_task++; 134c4762a1bSJed Brown 135c4762a1bSJed Brown } else { 136c4762a1bSJed Brown /* Send idle message */ 1379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, IDLE_TAG, PETSC_COMM_WORLD)); 138c4762a1bSJed Brown } 139c4762a1bSJed Brown } 140c4762a1bSJed Brown } 1419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 1429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(F, &f)); 143c4762a1bSJed Brown PetscLogFlops(6 * NOBSERVATIONS); 144c4762a1bSJed Brown PetscFunctionReturn(0); 145c4762a1bSJed Brown } 146c4762a1bSJed Brown 147c4762a1bSJed Brown /* ------------------------------------------------------------ */ 148*9371c9d4SSatish Balay PetscErrorCode FormStartingPoint(Vec X) { 149c4762a1bSJed Brown PetscReal *x; 150c4762a1bSJed Brown 151c4762a1bSJed Brown PetscFunctionBegin; 1529566063dSJacob Faibussowitsch PetscCall(VecGetArray(X, &x)); 153c4762a1bSJed Brown x[0] = 0.15; 154c4762a1bSJed Brown x[1] = 0.008; 155c4762a1bSJed Brown x[2] = 0.010; 1569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(X, &x)); 157c4762a1bSJed Brown PetscFunctionReturn(0); 158c4762a1bSJed Brown } 159c4762a1bSJed Brown 160c4762a1bSJed Brown /* ---------------------------------------------------------------------- */ 161*9371c9d4SSatish Balay PetscErrorCode InitializeData(AppCtx *user) { 162c4762a1bSJed Brown PetscReal *t = user->t, *y = user->y; 163c4762a1bSJed Brown PetscInt i = 0; 164c4762a1bSJed Brown 165c4762a1bSJed Brown PetscFunctionBegin; 166*9371c9d4SSatish Balay y[i] = 92.9000; 167*9371c9d4SSatish Balay t[i++] = 0.5000; 168*9371c9d4SSatish Balay y[i] = 78.7000; 169*9371c9d4SSatish Balay t[i++] = 0.6250; 170*9371c9d4SSatish Balay y[i] = 64.2000; 171*9371c9d4SSatish Balay t[i++] = 0.7500; 172*9371c9d4SSatish Balay y[i] = 64.9000; 173*9371c9d4SSatish Balay t[i++] = 0.8750; 174*9371c9d4SSatish Balay y[i] = 57.1000; 175*9371c9d4SSatish Balay t[i++] = 1.0000; 176*9371c9d4SSatish Balay y[i] = 43.3000; 177*9371c9d4SSatish Balay t[i++] = 1.2500; 178*9371c9d4SSatish Balay y[i] = 31.1000; 179*9371c9d4SSatish Balay t[i++] = 1.7500; 180*9371c9d4SSatish Balay y[i] = 23.6000; 181*9371c9d4SSatish Balay t[i++] = 2.2500; 182*9371c9d4SSatish Balay y[i] = 31.0500; 183*9371c9d4SSatish Balay t[i++] = 1.7500; 184*9371c9d4SSatish Balay y[i] = 23.7750; 185*9371c9d4SSatish Balay t[i++] = 2.2500; 186*9371c9d4SSatish Balay y[i] = 17.7375; 187*9371c9d4SSatish Balay t[i++] = 2.7500; 188*9371c9d4SSatish Balay y[i] = 13.8000; 189*9371c9d4SSatish Balay t[i++] = 3.2500; 190*9371c9d4SSatish Balay y[i] = 11.5875; 191*9371c9d4SSatish Balay t[i++] = 3.7500; 192*9371c9d4SSatish Balay y[i] = 9.4125; 193*9371c9d4SSatish Balay t[i++] = 4.2500; 194*9371c9d4SSatish Balay y[i] = 7.7250; 195*9371c9d4SSatish Balay t[i++] = 4.7500; 196*9371c9d4SSatish Balay y[i] = 7.3500; 197*9371c9d4SSatish Balay t[i++] = 5.2500; 198*9371c9d4SSatish Balay y[i] = 8.0250; 199*9371c9d4SSatish Balay t[i++] = 5.7500; 200*9371c9d4SSatish Balay y[i] = 90.6000; 201*9371c9d4SSatish Balay t[i++] = 0.5000; 202*9371c9d4SSatish Balay y[i] = 76.9000; 203*9371c9d4SSatish Balay t[i++] = 0.6250; 204*9371c9d4SSatish Balay y[i] = 71.6000; 205*9371c9d4SSatish Balay t[i++] = 0.7500; 206*9371c9d4SSatish Balay y[i] = 63.6000; 207*9371c9d4SSatish Balay t[i++] = 0.8750; 208*9371c9d4SSatish Balay y[i] = 54.0000; 209*9371c9d4SSatish Balay t[i++] = 1.0000; 210*9371c9d4SSatish Balay y[i] = 39.2000; 211*9371c9d4SSatish Balay t[i++] = 1.2500; 212*9371c9d4SSatish Balay y[i] = 29.3000; 213*9371c9d4SSatish Balay t[i++] = 1.7500; 214*9371c9d4SSatish Balay y[i] = 21.4000; 215*9371c9d4SSatish Balay t[i++] = 2.2500; 216*9371c9d4SSatish Balay y[i] = 29.1750; 217*9371c9d4SSatish Balay t[i++] = 1.7500; 218*9371c9d4SSatish Balay y[i] = 22.1250; 219*9371c9d4SSatish Balay t[i++] = 2.2500; 220*9371c9d4SSatish Balay y[i] = 17.5125; 221*9371c9d4SSatish Balay t[i++] = 2.7500; 222*9371c9d4SSatish Balay y[i] = 14.2500; 223*9371c9d4SSatish Balay t[i++] = 3.2500; 224*9371c9d4SSatish Balay y[i] = 9.4500; 225*9371c9d4SSatish Balay t[i++] = 3.7500; 226*9371c9d4SSatish Balay y[i] = 9.1500; 227*9371c9d4SSatish Balay t[i++] = 4.2500; 228*9371c9d4SSatish Balay y[i] = 7.9125; 229*9371c9d4SSatish Balay t[i++] = 4.7500; 230*9371c9d4SSatish Balay y[i] = 8.4750; 231*9371c9d4SSatish Balay t[i++] = 5.2500; 232*9371c9d4SSatish Balay y[i] = 6.1125; 233*9371c9d4SSatish Balay t[i++] = 5.7500; 234*9371c9d4SSatish Balay y[i] = 80.0000; 235*9371c9d4SSatish Balay t[i++] = 0.5000; 236*9371c9d4SSatish Balay y[i] = 79.0000; 237*9371c9d4SSatish Balay t[i++] = 0.6250; 238*9371c9d4SSatish Balay y[i] = 63.8000; 239*9371c9d4SSatish Balay t[i++] = 0.7500; 240*9371c9d4SSatish Balay y[i] = 57.2000; 241*9371c9d4SSatish Balay t[i++] = 0.8750; 242*9371c9d4SSatish Balay y[i] = 53.2000; 243*9371c9d4SSatish Balay t[i++] = 1.0000; 244*9371c9d4SSatish Balay y[i] = 42.5000; 245*9371c9d4SSatish Balay t[i++] = 1.2500; 246*9371c9d4SSatish Balay y[i] = 26.8000; 247*9371c9d4SSatish Balay t[i++] = 1.7500; 248*9371c9d4SSatish Balay y[i] = 20.4000; 249*9371c9d4SSatish Balay t[i++] = 2.2500; 250*9371c9d4SSatish Balay y[i] = 26.8500; 251*9371c9d4SSatish Balay t[i++] = 1.7500; 252*9371c9d4SSatish Balay y[i] = 21.0000; 253*9371c9d4SSatish Balay t[i++] = 2.2500; 254*9371c9d4SSatish Balay y[i] = 16.4625; 255*9371c9d4SSatish Balay t[i++] = 2.7500; 256*9371c9d4SSatish Balay y[i] = 12.5250; 257*9371c9d4SSatish Balay t[i++] = 3.2500; 258*9371c9d4SSatish Balay y[i] = 10.5375; 259*9371c9d4SSatish Balay t[i++] = 3.7500; 260*9371c9d4SSatish Balay y[i] = 8.5875; 261*9371c9d4SSatish Balay t[i++] = 4.2500; 262*9371c9d4SSatish Balay y[i] = 7.1250; 263*9371c9d4SSatish Balay t[i++] = 4.7500; 264*9371c9d4SSatish Balay y[i] = 6.1125; 265*9371c9d4SSatish Balay t[i++] = 5.2500; 266*9371c9d4SSatish Balay y[i] = 5.9625; 267*9371c9d4SSatish Balay t[i++] = 5.7500; 268*9371c9d4SSatish Balay y[i] = 74.1000; 269*9371c9d4SSatish Balay t[i++] = 0.5000; 270*9371c9d4SSatish Balay y[i] = 67.3000; 271*9371c9d4SSatish Balay t[i++] = 0.6250; 272*9371c9d4SSatish Balay y[i] = 60.8000; 273*9371c9d4SSatish Balay t[i++] = 0.7500; 274*9371c9d4SSatish Balay y[i] = 55.5000; 275*9371c9d4SSatish Balay t[i++] = 0.8750; 276*9371c9d4SSatish Balay y[i] = 50.3000; 277*9371c9d4SSatish Balay t[i++] = 1.0000; 278*9371c9d4SSatish Balay y[i] = 41.0000; 279*9371c9d4SSatish Balay t[i++] = 1.2500; 280*9371c9d4SSatish Balay y[i] = 29.4000; 281*9371c9d4SSatish Balay t[i++] = 1.7500; 282*9371c9d4SSatish Balay y[i] = 20.4000; 283*9371c9d4SSatish Balay t[i++] = 2.2500; 284*9371c9d4SSatish Balay y[i] = 29.3625; 285*9371c9d4SSatish Balay t[i++] = 1.7500; 286*9371c9d4SSatish Balay y[i] = 21.1500; 287*9371c9d4SSatish Balay t[i++] = 2.2500; 288*9371c9d4SSatish Balay y[i] = 16.7625; 289*9371c9d4SSatish Balay t[i++] = 2.7500; 290*9371c9d4SSatish Balay y[i] = 13.2000; 291*9371c9d4SSatish Balay t[i++] = 3.2500; 292*9371c9d4SSatish Balay y[i] = 10.8750; 293*9371c9d4SSatish Balay t[i++] = 3.7500; 294*9371c9d4SSatish Balay y[i] = 8.1750; 295*9371c9d4SSatish Balay t[i++] = 4.2500; 296*9371c9d4SSatish Balay y[i] = 7.3500; 297*9371c9d4SSatish Balay t[i++] = 4.7500; 298*9371c9d4SSatish Balay y[i] = 5.9625; 299*9371c9d4SSatish Balay t[i++] = 5.2500; 300*9371c9d4SSatish Balay y[i] = 5.6250; 301*9371c9d4SSatish Balay t[i++] = 5.7500; 302*9371c9d4SSatish Balay y[i] = 81.5000; 303*9371c9d4SSatish Balay t[i++] = .5000; 304*9371c9d4SSatish Balay y[i] = 62.4000; 305*9371c9d4SSatish Balay t[i++] = .7500; 306*9371c9d4SSatish Balay y[i] = 32.5000; 307*9371c9d4SSatish Balay t[i++] = 1.5000; 308*9371c9d4SSatish Balay y[i] = 12.4100; 309*9371c9d4SSatish Balay t[i++] = 3.0000; 310*9371c9d4SSatish Balay y[i] = 13.1200; 311*9371c9d4SSatish Balay t[i++] = 3.0000; 312*9371c9d4SSatish Balay y[i] = 15.5600; 313*9371c9d4SSatish Balay t[i++] = 3.0000; 314*9371c9d4SSatish Balay y[i] = 5.6300; 315*9371c9d4SSatish Balay t[i++] = 6.0000; 316*9371c9d4SSatish Balay y[i] = 78.0000; 317*9371c9d4SSatish Balay t[i++] = .5000; 318*9371c9d4SSatish Balay y[i] = 59.9000; 319*9371c9d4SSatish Balay t[i++] = .7500; 320*9371c9d4SSatish Balay y[i] = 33.2000; 321*9371c9d4SSatish Balay t[i++] = 1.5000; 322*9371c9d4SSatish Balay y[i] = 13.8400; 323*9371c9d4SSatish Balay t[i++] = 3.0000; 324*9371c9d4SSatish Balay y[i] = 12.7500; 325*9371c9d4SSatish Balay t[i++] = 3.0000; 326*9371c9d4SSatish Balay y[i] = 14.6200; 327*9371c9d4SSatish Balay t[i++] = 3.0000; 328*9371c9d4SSatish Balay y[i] = 3.9400; 329*9371c9d4SSatish Balay t[i++] = 6.0000; 330*9371c9d4SSatish Balay y[i] = 76.8000; 331*9371c9d4SSatish Balay t[i++] = .5000; 332*9371c9d4SSatish Balay y[i] = 61.0000; 333*9371c9d4SSatish Balay t[i++] = .7500; 334*9371c9d4SSatish Balay y[i] = 32.9000; 335*9371c9d4SSatish Balay t[i++] = 1.5000; 336*9371c9d4SSatish Balay y[i] = 13.8700; 337*9371c9d4SSatish Balay t[i++] = 3.0000; 338*9371c9d4SSatish Balay y[i] = 11.8100; 339*9371c9d4SSatish Balay t[i++] = 3.0000; 340*9371c9d4SSatish Balay y[i] = 13.3100; 341*9371c9d4SSatish Balay t[i++] = 3.0000; 342*9371c9d4SSatish Balay y[i] = 5.4400; 343*9371c9d4SSatish Balay t[i++] = 6.0000; 344*9371c9d4SSatish Balay y[i] = 78.0000; 345*9371c9d4SSatish Balay t[i++] = .5000; 346*9371c9d4SSatish Balay y[i] = 63.5000; 347*9371c9d4SSatish Balay t[i++] = .7500; 348*9371c9d4SSatish Balay y[i] = 33.8000; 349*9371c9d4SSatish Balay t[i++] = 1.5000; 350*9371c9d4SSatish Balay y[i] = 12.5600; 351*9371c9d4SSatish Balay t[i++] = 3.0000; 352*9371c9d4SSatish Balay y[i] = 5.6300; 353*9371c9d4SSatish Balay t[i++] = 6.0000; 354*9371c9d4SSatish Balay y[i] = 12.7500; 355*9371c9d4SSatish Balay t[i++] = 3.0000; 356*9371c9d4SSatish Balay y[i] = 13.1200; 357*9371c9d4SSatish Balay t[i++] = 3.0000; 358*9371c9d4SSatish Balay y[i] = 5.4400; 359*9371c9d4SSatish Balay t[i++] = 6.0000; 360*9371c9d4SSatish Balay y[i] = 76.8000; 361*9371c9d4SSatish Balay t[i++] = .5000; 362*9371c9d4SSatish Balay y[i] = 60.0000; 363*9371c9d4SSatish Balay t[i++] = .7500; 364*9371c9d4SSatish Balay y[i] = 47.8000; 365*9371c9d4SSatish Balay t[i++] = 1.0000; 366*9371c9d4SSatish Balay y[i] = 32.0000; 367*9371c9d4SSatish Balay t[i++] = 1.5000; 368*9371c9d4SSatish Balay y[i] = 22.2000; 369*9371c9d4SSatish Balay t[i++] = 2.0000; 370*9371c9d4SSatish Balay y[i] = 22.5700; 371*9371c9d4SSatish Balay t[i++] = 2.0000; 372*9371c9d4SSatish Balay y[i] = 18.8200; 373*9371c9d4SSatish Balay t[i++] = 2.5000; 374*9371c9d4SSatish Balay y[i] = 13.9500; 375*9371c9d4SSatish Balay t[i++] = 3.0000; 376*9371c9d4SSatish Balay y[i] = 11.2500; 377*9371c9d4SSatish Balay t[i++] = 4.0000; 378*9371c9d4SSatish Balay y[i] = 9.0000; 379*9371c9d4SSatish Balay t[i++] = 5.0000; 380*9371c9d4SSatish Balay y[i] = 6.6700; 381*9371c9d4SSatish Balay t[i++] = 6.0000; 382*9371c9d4SSatish Balay y[i] = 75.8000; 383*9371c9d4SSatish Balay t[i++] = .5000; 384*9371c9d4SSatish Balay y[i] = 62.0000; 385*9371c9d4SSatish Balay t[i++] = .7500; 386*9371c9d4SSatish Balay y[i] = 48.8000; 387*9371c9d4SSatish Balay t[i++] = 1.0000; 388*9371c9d4SSatish Balay y[i] = 35.2000; 389*9371c9d4SSatish Balay t[i++] = 1.5000; 390*9371c9d4SSatish Balay y[i] = 20.0000; 391*9371c9d4SSatish Balay t[i++] = 2.0000; 392*9371c9d4SSatish Balay y[i] = 20.3200; 393*9371c9d4SSatish Balay t[i++] = 2.0000; 394*9371c9d4SSatish Balay y[i] = 19.3100; 395*9371c9d4SSatish Balay t[i++] = 2.5000; 396*9371c9d4SSatish Balay y[i] = 12.7500; 397*9371c9d4SSatish Balay t[i++] = 3.0000; 398*9371c9d4SSatish Balay y[i] = 10.4200; 399*9371c9d4SSatish Balay t[i++] = 4.0000; 400*9371c9d4SSatish Balay y[i] = 7.3100; 401*9371c9d4SSatish Balay t[i++] = 5.0000; 402*9371c9d4SSatish Balay y[i] = 7.4200; 403*9371c9d4SSatish Balay t[i++] = 6.0000; 404*9371c9d4SSatish Balay y[i] = 70.5000; 405*9371c9d4SSatish Balay t[i++] = .5000; 406*9371c9d4SSatish Balay y[i] = 59.5000; 407*9371c9d4SSatish Balay t[i++] = .7500; 408*9371c9d4SSatish Balay y[i] = 48.5000; 409*9371c9d4SSatish Balay t[i++] = 1.0000; 410*9371c9d4SSatish Balay y[i] = 35.8000; 411*9371c9d4SSatish Balay t[i++] = 1.5000; 412*9371c9d4SSatish Balay y[i] = 21.0000; 413*9371c9d4SSatish Balay t[i++] = 2.0000; 414*9371c9d4SSatish Balay y[i] = 21.6700; 415*9371c9d4SSatish Balay t[i++] = 2.0000; 416*9371c9d4SSatish Balay y[i] = 21.0000; 417*9371c9d4SSatish Balay t[i++] = 2.5000; 418*9371c9d4SSatish Balay y[i] = 15.6400; 419*9371c9d4SSatish Balay t[i++] = 3.0000; 420*9371c9d4SSatish Balay y[i] = 8.1700; 421*9371c9d4SSatish Balay t[i++] = 4.0000; 422*9371c9d4SSatish Balay y[i] = 8.5500; 423*9371c9d4SSatish Balay t[i++] = 5.0000; 424*9371c9d4SSatish Balay y[i] = 10.1200; 425*9371c9d4SSatish Balay t[i++] = 6.0000; 426*9371c9d4SSatish Balay y[i] = 78.0000; 427*9371c9d4SSatish Balay t[i++] = .5000; 428*9371c9d4SSatish Balay y[i] = 66.0000; 429*9371c9d4SSatish Balay t[i++] = .6250; 430*9371c9d4SSatish Balay y[i] = 62.0000; 431*9371c9d4SSatish Balay t[i++] = .7500; 432*9371c9d4SSatish Balay y[i] = 58.0000; 433*9371c9d4SSatish Balay t[i++] = .8750; 434*9371c9d4SSatish Balay y[i] = 47.7000; 435*9371c9d4SSatish Balay t[i++] = 1.0000; 436*9371c9d4SSatish Balay y[i] = 37.8000; 437*9371c9d4SSatish Balay t[i++] = 1.2500; 438*9371c9d4SSatish Balay y[i] = 20.2000; 439*9371c9d4SSatish Balay t[i++] = 2.2500; 440*9371c9d4SSatish Balay y[i] = 21.0700; 441*9371c9d4SSatish Balay t[i++] = 2.2500; 442*9371c9d4SSatish Balay y[i] = 13.8700; 443*9371c9d4SSatish Balay t[i++] = 2.7500; 444*9371c9d4SSatish Balay y[i] = 9.6700; 445*9371c9d4SSatish Balay t[i++] = 3.2500; 446*9371c9d4SSatish Balay y[i] = 7.7600; 447*9371c9d4SSatish Balay t[i++] = 3.7500; 448*9371c9d4SSatish Balay y[i] = 5.4400; 449*9371c9d4SSatish Balay t[i++] = 4.2500; 450*9371c9d4SSatish Balay y[i] = 4.8700; 451*9371c9d4SSatish Balay t[i++] = 4.7500; 452*9371c9d4SSatish Balay y[i] = 4.0100; 453*9371c9d4SSatish Balay t[i++] = 5.2500; 454*9371c9d4SSatish Balay y[i] = 3.7500; 455*9371c9d4SSatish Balay t[i++] = 5.7500; 456*9371c9d4SSatish Balay y[i] = 24.1900; 457*9371c9d4SSatish Balay t[i++] = 3.0000; 458*9371c9d4SSatish Balay y[i] = 25.7600; 459*9371c9d4SSatish Balay t[i++] = 3.0000; 460*9371c9d4SSatish Balay y[i] = 18.0700; 461*9371c9d4SSatish Balay t[i++] = 3.0000; 462*9371c9d4SSatish Balay y[i] = 11.8100; 463*9371c9d4SSatish Balay t[i++] = 3.0000; 464*9371c9d4SSatish Balay y[i] = 12.0700; 465*9371c9d4SSatish Balay t[i++] = 3.0000; 466*9371c9d4SSatish Balay y[i] = 16.1200; 467*9371c9d4SSatish Balay t[i++] = 3.0000; 468*9371c9d4SSatish Balay y[i] = 70.8000; 469*9371c9d4SSatish Balay t[i++] = .5000; 470*9371c9d4SSatish Balay y[i] = 54.7000; 471*9371c9d4SSatish Balay t[i++] = .7500; 472*9371c9d4SSatish Balay y[i] = 48.0000; 473*9371c9d4SSatish Balay t[i++] = 1.0000; 474*9371c9d4SSatish Balay y[i] = 39.8000; 475*9371c9d4SSatish Balay t[i++] = 1.5000; 476*9371c9d4SSatish Balay y[i] = 29.8000; 477*9371c9d4SSatish Balay t[i++] = 2.0000; 478*9371c9d4SSatish Balay y[i] = 23.7000; 479*9371c9d4SSatish Balay t[i++] = 2.5000; 480*9371c9d4SSatish Balay y[i] = 29.6200; 481*9371c9d4SSatish Balay t[i++] = 2.0000; 482*9371c9d4SSatish Balay y[i] = 23.8100; 483*9371c9d4SSatish Balay t[i++] = 2.5000; 484*9371c9d4SSatish Balay y[i] = 17.7000; 485*9371c9d4SSatish Balay t[i++] = 3.0000; 486*9371c9d4SSatish Balay y[i] = 11.5500; 487*9371c9d4SSatish Balay t[i++] = 4.0000; 488*9371c9d4SSatish Balay y[i] = 12.0700; 489*9371c9d4SSatish Balay t[i++] = 5.0000; 490*9371c9d4SSatish Balay y[i] = 8.7400; 491*9371c9d4SSatish Balay t[i++] = 6.0000; 492*9371c9d4SSatish Balay y[i] = 80.7000; 493*9371c9d4SSatish Balay t[i++] = .5000; 494*9371c9d4SSatish Balay y[i] = 61.3000; 495*9371c9d4SSatish Balay t[i++] = .7500; 496*9371c9d4SSatish Balay y[i] = 47.5000; 497*9371c9d4SSatish Balay t[i++] = 1.0000; 498*9371c9d4SSatish Balay y[i] = 29.0000; 499*9371c9d4SSatish Balay t[i++] = 1.5000; 500*9371c9d4SSatish Balay y[i] = 24.0000; 501*9371c9d4SSatish Balay t[i++] = 2.0000; 502*9371c9d4SSatish Balay y[i] = 17.7000; 503*9371c9d4SSatish Balay t[i++] = 2.5000; 504*9371c9d4SSatish Balay y[i] = 24.5600; 505*9371c9d4SSatish Balay t[i++] = 2.0000; 506*9371c9d4SSatish Balay y[i] = 18.6700; 507*9371c9d4SSatish Balay t[i++] = 2.5000; 508*9371c9d4SSatish Balay y[i] = 16.2400; 509*9371c9d4SSatish Balay t[i++] = 3.0000; 510*9371c9d4SSatish Balay y[i] = 8.7400; 511*9371c9d4SSatish Balay t[i++] = 4.0000; 512*9371c9d4SSatish Balay y[i] = 7.8700; 513*9371c9d4SSatish Balay t[i++] = 5.0000; 514*9371c9d4SSatish Balay y[i] = 8.5100; 515*9371c9d4SSatish Balay t[i++] = 6.0000; 516*9371c9d4SSatish Balay y[i] = 66.7000; 517*9371c9d4SSatish Balay t[i++] = .5000; 518*9371c9d4SSatish Balay y[i] = 59.2000; 519*9371c9d4SSatish Balay t[i++] = .7500; 520*9371c9d4SSatish Balay y[i] = 40.8000; 521*9371c9d4SSatish Balay t[i++] = 1.0000; 522*9371c9d4SSatish Balay y[i] = 30.7000; 523*9371c9d4SSatish Balay t[i++] = 1.5000; 524*9371c9d4SSatish Balay y[i] = 25.7000; 525*9371c9d4SSatish Balay t[i++] = 2.0000; 526*9371c9d4SSatish Balay y[i] = 16.3000; 527*9371c9d4SSatish Balay t[i++] = 2.5000; 528*9371c9d4SSatish Balay y[i] = 25.9900; 529*9371c9d4SSatish Balay t[i++] = 2.0000; 530*9371c9d4SSatish Balay y[i] = 16.9500; 531*9371c9d4SSatish Balay t[i++] = 2.5000; 532*9371c9d4SSatish Balay y[i] = 13.3500; 533*9371c9d4SSatish Balay t[i++] = 3.0000; 534*9371c9d4SSatish Balay y[i] = 8.6200; 535*9371c9d4SSatish Balay t[i++] = 4.0000; 536*9371c9d4SSatish Balay y[i] = 7.2000; 537*9371c9d4SSatish Balay t[i++] = 5.0000; 538*9371c9d4SSatish Balay y[i] = 6.6400; 539*9371c9d4SSatish Balay t[i++] = 6.0000; 540*9371c9d4SSatish Balay y[i] = 13.6900; 541*9371c9d4SSatish Balay t[i++] = 3.0000; 542*9371c9d4SSatish Balay y[i] = 81.0000; 543*9371c9d4SSatish Balay t[i++] = .5000; 544*9371c9d4SSatish Balay y[i] = 64.5000; 545*9371c9d4SSatish Balay t[i++] = .7500; 546*9371c9d4SSatish Balay y[i] = 35.5000; 547*9371c9d4SSatish Balay t[i++] = 1.5000; 548*9371c9d4SSatish Balay y[i] = 13.3100; 549*9371c9d4SSatish Balay t[i++] = 3.0000; 550*9371c9d4SSatish Balay y[i] = 4.8700; 551*9371c9d4SSatish Balay t[i++] = 6.0000; 552*9371c9d4SSatish Balay y[i] = 12.9400; 553*9371c9d4SSatish Balay t[i++] = 3.0000; 554*9371c9d4SSatish Balay y[i] = 5.0600; 555*9371c9d4SSatish Balay t[i++] = 6.0000; 556*9371c9d4SSatish Balay y[i] = 15.1900; 557*9371c9d4SSatish Balay t[i++] = 3.0000; 558*9371c9d4SSatish Balay y[i] = 14.6200; 559*9371c9d4SSatish Balay t[i++] = 3.0000; 560*9371c9d4SSatish Balay y[i] = 15.6400; 561*9371c9d4SSatish Balay t[i++] = 3.0000; 562*9371c9d4SSatish Balay y[i] = 25.5000; 563*9371c9d4SSatish Balay t[i++] = 1.7500; 564*9371c9d4SSatish Balay y[i] = 25.9500; 565*9371c9d4SSatish Balay t[i++] = 1.7500; 566*9371c9d4SSatish Balay y[i] = 81.7000; 567*9371c9d4SSatish Balay t[i++] = .5000; 568*9371c9d4SSatish Balay y[i] = 61.6000; 569*9371c9d4SSatish Balay t[i++] = .7500; 570*9371c9d4SSatish Balay y[i] = 29.8000; 571*9371c9d4SSatish Balay t[i++] = 1.7500; 572*9371c9d4SSatish Balay y[i] = 29.8100; 573*9371c9d4SSatish Balay t[i++] = 1.7500; 574*9371c9d4SSatish Balay y[i] = 17.1700; 575*9371c9d4SSatish Balay t[i++] = 2.7500; 576*9371c9d4SSatish Balay y[i] = 10.3900; 577*9371c9d4SSatish Balay t[i++] = 3.7500; 578*9371c9d4SSatish Balay y[i] = 28.4000; 579*9371c9d4SSatish Balay t[i++] = 1.7500; 580*9371c9d4SSatish Balay y[i] = 28.6900; 581*9371c9d4SSatish Balay t[i++] = 1.7500; 582*9371c9d4SSatish Balay y[i] = 81.3000; 583*9371c9d4SSatish Balay t[i++] = .5000; 584*9371c9d4SSatish Balay y[i] = 60.9000; 585*9371c9d4SSatish Balay t[i++] = .7500; 586*9371c9d4SSatish Balay y[i] = 16.6500; 587*9371c9d4SSatish Balay t[i++] = 2.7500; 588*9371c9d4SSatish Balay y[i] = 10.0500; 589*9371c9d4SSatish Balay t[i++] = 3.7500; 590*9371c9d4SSatish Balay y[i] = 28.9000; 591*9371c9d4SSatish Balay t[i++] = 1.7500; 592*9371c9d4SSatish Balay y[i] = 28.9500; 593*9371c9d4SSatish Balay t[i++] = 1.7500; 594c4762a1bSJed Brown PetscFunctionReturn(0); 595c4762a1bSJed Brown } 596c4762a1bSJed Brown 597*9371c9d4SSatish Balay PetscErrorCode TaskWorker(AppCtx *user) { 598c4762a1bSJed Brown PetscReal x[NPARAMETERS], f = 0.0; 599c4762a1bSJed Brown PetscMPIInt tag = IDLE_TAG; 600c4762a1bSJed Brown PetscInt index; 601c4762a1bSJed Brown MPI_Status status; 602c4762a1bSJed Brown 603c4762a1bSJed Brown PetscFunctionBegin; 6049dddd249SSatish Balay /* Send check-in message to rank-0 */ 605c4762a1bSJed Brown 6069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, IDLE_TAG, PETSC_COMM_WORLD)); 607c4762a1bSJed Brown while (tag != DIE_TAG) { 6089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(x, NPARAMETERS, MPIU_REAL, 0, MPI_ANY_TAG, PETSC_COMM_WORLD, &status)); 609c4762a1bSJed Brown tag = status.MPI_TAG; 610c4762a1bSJed Brown if (tag == IDLE_TAG) { 6119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, IDLE_TAG, PETSC_COMM_WORLD)); 612c4762a1bSJed Brown } else if (tag != DIE_TAG) { 613c4762a1bSJed Brown index = (PetscInt)tag; 6149566063dSJacob Faibussowitsch PetscCall(RunSimulation(x, index, &f, user)); 6159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, tag, PETSC_COMM_WORLD)); 616c4762a1bSJed Brown } 617c4762a1bSJed Brown } 618c4762a1bSJed Brown PetscFunctionReturn(0); 619c4762a1bSJed Brown } 620c4762a1bSJed Brown 621*9371c9d4SSatish Balay PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal *f, AppCtx *user) { 622c4762a1bSJed Brown PetscReal *t = user->t; 623c4762a1bSJed Brown PetscReal *y = user->y; 624c4762a1bSJed Brown #if defined(PETSC_USE_REAL_SINGLE) 625e1dfdf8eSBarry Smith *f = y[i] - exp(-x[0] * t[i]) / (x[1] + x[2] * t[i]); /* expf() for single-precision breaks this example on Freebsd, Valgrind errors on Linux */ 626c4762a1bSJed Brown #else 627c4762a1bSJed Brown *f = y[i] - PetscExpScalar(-x[0] * t[i]) / (x[1] + x[2] * t[i]); 628c4762a1bSJed Brown #endif 629c4762a1bSJed Brown return (0); 630c4762a1bSJed Brown } 631c4762a1bSJed Brown 632*9371c9d4SSatish Balay PetscErrorCode StopWorkers(AppCtx *user) { 633c4762a1bSJed Brown PetscInt checkedin; 634c4762a1bSJed Brown MPI_Status status; 635c4762a1bSJed Brown PetscReal f, x[NPARAMETERS]; 636c4762a1bSJed Brown 637c4762a1bSJed Brown PetscFunctionBegin; 638c4762a1bSJed Brown checkedin = 0; 639c4762a1bSJed Brown while (checkedin < user->size - 1) { 6409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(&f, 1, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, &status)); 641c4762a1bSJed Brown checkedin++; 6429566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x, NPARAMETERS)); 6439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, DIE_TAG, PETSC_COMM_WORLD)); 644c4762a1bSJed Brown } 645c4762a1bSJed Brown PetscFunctionReturn(0); 646c4762a1bSJed Brown } 647c4762a1bSJed Brown 648c4762a1bSJed Brown /*TEST 649c4762a1bSJed Brown 650c4762a1bSJed Brown build: 651c4762a1bSJed Brown requires: !complex 652c4762a1bSJed Brown 653c4762a1bSJed Brown test: 654c4762a1bSJed Brown nsize: 3 655c4762a1bSJed Brown requires: !single 656c4762a1bSJed Brown args: -tao_smonitor -tao_max_it 100 -tao_type pounders -tao_gatol 1.e-5 657c4762a1bSJed Brown 658c4762a1bSJed Brown TEST*/ 659