GeneticAlgoithm
Implementation of the genetic algorithm
runGA.cxx
Go to the documentation of this file.
1 /**
2  * @file
3  */
4 
5 #include <iostream>
6 
7 #include <TRandom3.h>
8 
9 #include "ParametricModel.h"
10 #include "ParametricModelPopulation.h"
11 #include "Chi2FitFigureOfMerit.h"
12 #include "GeneticAlgorithm.h"
13 #include "optparse.h"
14 
15 #include <TH1.h>
16 #include <TMath.h>
17 #include <TCanvas.h>
18 #include <TGraph.h>
19 #include <TSystem.h>
20 #include <TStyle.h>
21 #include <TLatex.h>
22 #include <TString.h>
23 #include <TLegend.h>
24 
25 void parseCommandLine(Config &config, int argc, char **argv);
26 
27 /**
28  * @defgroup runGA Demo Program
29  *
30  * @brief Demo Program.
31  *
32  * @b Objective: Use the Genetic Algorithm to fit a gaussian probability density function to a data distribution.
33  *
34  * @{
35  */
36 
37 /**
38  * @brief Main function
39  *
40  * This program performs the following tasks:
41  * - Parse the command line and defines configuration.
42  * - Generates a dataset following a gaussian distribution.
43  * - Fits the generated distribution using ROOT's implementation.
44  * - Fits the generated distribution using our GA implementation.
45  * - Perform some tests about the behavior of our GA algorithm.
46  * - Plot the results of the main algorithm and the tests.
47  *
48  * Full documentation of the algorithms is available in @ref index.
49  *
50  * @param argc Number of command line arguments.
51  * @param argv Array of command line arguments.
52  * @return 0 upon successfull exit
53  */
54 int main(int argc, char **argv) {
55 
56  //
57  // Initialize program settings
58  //
59  Config config;
60  parseCommandLine(config, argc, argv);
61 
62  std::cout << "Algorithm Configuration:" << std::endl
63  << " ==> nmc = " << (int)config.get("nmc") << std::endl
64  << " ==> acceptThreshold = " << (double)config.get("acceptThreshold") << std::endl
65  << " ==> mutateRate = " << (double)config.get("mutateRate") << std::endl
66  << " ==> mutateSize = " << (double)config.get("mutateSize") << std::endl
67  << " ==> maxGenerations = " << (int)config.get("maxGenerations") << std::endl
68  << " ==> populationSize = " << (int)config.get("populationSize") << std::endl;
69 
70  //
71  // Generates a dataset following a gaussian distribution.
72  //
73  int nmc = config.get("nmc");
74  double mean = config.get("mean");
75  double sigma = config.get("sigma");
76  TRandom3 rnd(1234);
77  double xmin = mean-5*sigma;
78  double xmax = mean+5*sigma;
79  int nbins = 100;
80  TH1 *hData = new TH1F("hData", "", nbins, xmin, xmax);
81  double dx = (xmax-xmin)/nbins;
82  hData->Sumw2();
83  hData->GetXaxis()->SetTitle("x");
84  hData->GetYaxis()->SetTitle("Probability density");
85  for(int mc=0; mc<nmc; mc++) {
86  hData->Fill(rnd.Gaus(mean, sigma), 1./(nmc*dx));
87  }
88 
89  //
90  // Perform a likelihood fit using ROOT's implementation.
91  //
92  TF1 *likelihoodFit = new TF1("likelihoodFit", "gaus", xmin, xmax);
93  hData->Fit(likelihoodFit, "NOQ");
94 
95  //
96  // Initialize the figure of merit and pass the data.
97  //
99  fom.setAcceptThreshold(config.get("acceptThreshold"));
100  std::vector<double> x(1);
101  for(int bin=1; bin<=hData->GetNbinsX(); bin++) {
102  x[0] = hData->GetBinCenter(bin);
103  fom.addData(x, hData->GetBinContent(bin), hData->GetBinError(bin));
104  }
105 
106  //
107  // Configure the population to be optimized
108  //
109  ParametricModelPopulation population;
110  population.setMutateRate(config.get("mutateRate"));
111  population.setMutationSize(config.get("mutateSize"));
112  population.setFigureOfMerit(&fom);
113  TF1 *f = new TF1("f", "gaus", xmin, xmax);
114  f->SetParameter(0, 1./(sigma*sqrt(2*TMath::Pi())));
115  f->SetParameter(1, mean);
116  f->SetParameter(2, sigma);
117  f->SetParLimits(0, 0.001, 1);
118  f->SetParLimits(1, -10, 10);
119  f->SetParLimits(2, 0.001, 10);
120  f->SetParName(0, "Constant");
121  f->SetParName(1, "Mean");
122  f->SetParName(2, "Sigma");
123  population.setFormula(f);
124 
125  std::cout << "Input parameters:" << std::endl;
126  for(int i=0; i<f->GetNpar(); i++) {
127  double pmin, pmax;
128  f->GetParLimits(i, pmin, pmax);
129  std::cout << " ==> " << f->GetParName(i) << " : " << f->GetParameter(i)
130  << " - range: [" << pmin << ", " << pmax << "]"
131  << std::endl;
132  }
133 
134  //
135  // Configure the Genetic Algorithm
136  //
137  GeneticAlgorithm alg;
138  alg.setNGenerationsMax(config.get("maxGenerations"));
139  alg.setPopulationSize(config.get("populationSize"));
140 
141  //
142  // Prepare for making plots
143  //
144  gSystem->Exec("rm -rf figures");
145  gSystem->Exec("mkdir -p figures");
146  gStyle->SetOptStat(0); // disables stat box on plots
147  TGraph *gScore = new TGraph();
148  TGraph *gRMS = new TGraph();
149  TCanvas *C_anim = new TCanvas("C_anim", "C_anim");
150  hData->SetMarkerStyle(20);
151  likelihoodFit->SetLineColor(4);
152  likelihoodFit->SetLineStyle(7);
153  f->SetLineColor(2);
154  TLegend *L_anim = new TLegend(0.7, 0.7, 0.9, 0.9);
155  L_anim->AddEntry(hData, "Data", "lp");
156  L_anim->AddEntry(likelihoodFit, "Likelihood fit", "l");
157  L_anim->AddEntry(f, "GA fit", "l");
158  TLatex *lt_anim = new TLatex();
159 
160  //
161  // Run the GA and the tests if requested
162  //
163  alg.initialize(&population);
164  do {
165  if(config.get("runTests")) {
166  ParametricModel *bestModel = (ParametricModel*)population.getBestFitted();
167  gScore->SetPoint(gScore->GetN(), alg.getCurrentGeneration(), bestModel->getScore());
168  gRMS->SetPoint(gRMS->GetN(), alg.getCurrentGeneration(), population.getScoreRMS()/bestModel->getScore());
169  C_anim->cd();
170  hData->Draw();
171  likelihoodFit->Draw("same");
172  bestModel->getFormula()->SetLineColor(2);
173  bestModel->getFormula()->Draw("same");
174  L_anim->Draw();
175  lt_anim->SetText(xmin + (xmax-xmin)/10., hData->GetMaximum()*0.9, TString::Format("Generation: %d", alg.getCurrentGeneration()));
176  lt_anim->Draw();
177  C_anim->Modified();
178  C_anim->Update();
179  C_anim->SaveAs("figures/C_anim.gif+25");
180  std::cout << "\rGeneration: " << alg.getCurrentGeneration();
181  std::flush(std::cout);
182  }
183  } while(alg.nextGeneration());
184  if(config.get("runTests")) {
185  std::cout << std::endl;
186  }
187 
188  //
189  // Display results and make plots
190  //
191  ParametricModel *bestModel = (ParametricModel*)population.getBestFitted();
192  TF1 *bestFormula = bestModel->getFormula();
193 
194  std::cout << "Done after " << alg.getCurrentGeneration() << " generations." << std::endl
195  << " ==> Best score is: " << bestModel->getScore() << std::endl;
196 
197  std::cout << "After GA fit: " << std::endl;
198  for(int i=0; i<bestFormula->GetNpar(); i++) {
199  std::cout << " ==> " << f->GetParName(i) << " : " << bestFormula->GetParameter(i) << std::endl;
200  }
201 
202  std::cout << "After Likelihood fit: " << std::endl;
203  for(int i=0; i<likelihoodFit->GetNpar(); i++) {
204  std::cout << " ==> " << f->GetParName(i) << " : " << likelihoodFit->GetParameter(i) << std::endl;
205  }
206 
207  TCanvas *C_fit = new TCanvas("C_fit", "C_fit");
208  C_fit->cd();
209  hData->Draw("ep");
210  likelihoodFit->Draw("same");
211  bestFormula->SetLineColor(2);
212  bestFormula->Draw("same");
213  L_anim->Draw();
214  lt_anim->SetText(xmin + (xmax-xmin)/10., hData->GetMaximum()*0.9, TString::Format("Generation: %d", alg.getCurrentGeneration()));
215  lt_anim->Draw();
216  C_fit->SaveAs("figures/C_fit.png");
217  C_fit->SaveAs("figures/C_fit.root");
218 
219  if(config.get("runTests")) {
220  C_fit->SaveAs("figures/C_anim.gif++300");
221 
222  TCanvas *C_Score = new TCanvas("C_Score", "C_Score");
223  C_Score->cd()->SetLogx();
224  C_Score->cd()->SetLogy();
225  gScore->SetLineColor(4);
226  gScore->SetLineWidth(2);
227  gScore->Draw("AL");
228  C_Score->SaveAs("figures/C_Score.png");
229  C_Score->SaveAs("figures/C_Score.root");
230 
231  TCanvas *C_RMS = new TCanvas("C_RMS", "C_RMS");
232  C_RMS->cd()->SetLogx();
233  C_RMS->cd()->SetLogy();
234  gRMS->SetLineColor(4);
235  gRMS->SetLineWidth(2);
236  gRMS->Draw("AL");
237  C_RMS->SaveAs("figures/C_RMS.png");
238  C_RMS->SaveAs("figures/C_RMS.root");
239  }
240 
241  return 0;
242 }
243 
244 
245 /**
246  * @brief Prase command line arguments.
247  *
248  * @param config Configuration to parse into.
249  * @param argc Number of command line arguments.
250  * @param argv Array of command line arguments.
251  *
252  * #### Configuration details:
253  */
254 void parseCommandLine(Config &config, int argc, char **argv)
255 {
256 
257  optparse::OptionParser parser = optparse::OptionParser().description("Point Cloud Analysis");
258 
259  /** - @b -n, <b> \-\-nmc </b> Number of toy MC experiments used to build the dataset.*/
260  parser.add_option("-n", "--nmc").action("store").dest("nmc").set_default(10000)
261  .help("Number of toy MC experiments used to build the dataset.");
262 
263  /** - @b -m, <b> \-\-mean </b> Mean of the gaussian distribution used to generate the dataset. */
264  parser.add_option("-m", "--mean").action("store").dest("mean").set_default(1.5)
265  .help("Mean of the gaussian distribution used to generate the dataset.");
266 
267  /** - @b -s, <b> \-\-sigma </b> Width (sigma) of the gaussian distribution used to generate the dataset. */
268  parser.add_option("-s", "--sigma").action("store").dest("sigma").set_default(2.3)
269  .help("Width (sigma) of the gaussian distribution used to generate the dataset.");
270 
271  /** - @b -a, <b> \-\-acceptThreshold </b> Score threshold to accept a model as a final answer. */
272  parser.add_option("-a", "--acceptThreshold").action("store").dest("acceptThreshold").set_default(0.85)
273  .help("Score threshold to accept a model as a final answer.");
274 
275  /** - @b -R, <b> \-\-mutateRate </b> Rate at which models are subjected to mutation. */
276  parser.add_option("-R", "--mutateRate").action("store").dest("mutateRate").set_default(0.01)
277  .help("Rate at which models are subjected to mutation.");
278 
279  /** - @b -S, <b> \-\-mutateSize </b> Relative size of the mutation whenever applied. */
280  parser.add_option("-S", "--mutateSize").action("store").dest("mutateSize").set_default(0.1)
281  .help("Relative size of the mutation whenever applied.");
282 
283  /** - @b -G, <b> \-\-maxGenerations </b> Maximum number of generations before aborting the optimization loop. */
284  parser.add_option("-G", "--maxGenerations").action("store").dest("maxGenerations").set_default(10000)
285  .help("Maximum number of generations before aborting the optimization loop.");
286 
287  /** - @b -N, <b> \-\-populationSize </b> Size of the population to be evolved. */
288  parser.add_option("-G", "--populationSize").action("store").dest("populationSize").set_default(500)
289  .help("Size of the population to be evolved.");
290 
291  /** - @b -t, <b> \-\-runTests </b> Run tests alongside the main algorithm. */
292  parser.add_option("-t", "--runTests").action("store_true").dest("runTests").set_default(false)
293  .help("Run tests alongside the main algorithm.");
294 
295  config = parser.parse_args(argc, argv);
296 }
void setAcceptThreshold(double acceptThreshold)
void setMutationSize(double relativeSize)
IModel * getBestFitted(int rank=0)
Implements a population of parametric models.
void addData(const std::vector< double > &x, double y, double ey)
Copyright (C) 2010 Johannes Weißl jargon@molb.org License: your favourite BSD-style license...
void setMutateRate(double rate)
Definition: IPopulation.cxx:79
void setFigureOfMerit(IFigureOfMerit *fom)
Class implementing a figure of merit.
double getScore() const
Definition: IModel.cxx:16
void initialize(IPopulation *population)
int main(int argc, char **argv)
Main function.
Definition: runGA.cxx:54
void setNGenerationsMax(int generationsMax)
void parseCommandLine(Config &config, int argc, char **argv)
Prase command line arguments.
Definition: runGA.cxx:254
Class imlementing the Genetic Algorithm.
void setPopulationSize(int populationSize)
Class representing a model defined by a parametric function.
double getScoreRMS()