MCMC.h

Go to the documentation of this file.
00001 ///
00002 /// \file   MCMC.h
00003 /// \brief  Definitions of classes for MCMC evaluation of Bayesian models
00004 ///
00005 /// The ParameterT, StepT, and Model classes defined here are the core classes
00006 /// that do all of the work. To implement a Bayesian model using these classes
00007 /// each parameter in the model should be derived from Paramater and should
00008 /// override llike(), at a minimum. lprior() should be overridden for
00009 /// anything other than a flat prior. (Notice that the prior will be
00010 /// improper unless overriden when the parameter has an unbounded domain.)
00011 /// The model is derived from Model, and each parameter is pushed onto a
00012 /// stack (step_), with a specified Step type (MetroStep, SliceStep, or
00013 /// FunctionStep).
00014 ///
00015 /// \author Kent Holsinger
00016 /// \date   2004-07-03
00017 ///
00018 
00019 // This file is part of MCMC++, a library for constructing C++ programs
00020 // that implement MCMC analyses of Bayesian statistical models.
00021 // Copyright (c) 2004-2006 Kent E. Holsinger
00022 //
00023 // MCMC++ is free software; you can redistribute it and/or modify
00024 // it under the terms of the GNU General Public License as published by
00025 // the Free Software Foundation; either version 2 of the License, or
00026 // (at your option) any later version.
00027 //
00028 // MCMC++ is distributed in the hope that it will be useful,
00029 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00030 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00031 // GNU General Public License for more details.
00032 //
00033 // You should have received a copy of the GNU General Public License
00034 // along with MCMC++; if not, write to the Free Software
00035 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00036 //
00037 
00038 #if !defined(__MCMC_H)
00039 #define __MCMC_H
00040 
00041 // Boost includes
00042 #include <boost/any.hpp>
00043 #include <boost/format.hpp>
00044 #include <boost/progress.hpp>
00045 // local includes
00046 #include "mcmc++/DataTable.h"
00047 #include "mcmc++/lot.h"
00048 #include "mcmc++/util.h"
00049 #include "mcmc++/statistics.h"
00050 
00051 extern const double MCMC_ZERO_FREQ;
00052 
00053 namespace MCMC {
00054   const double MinBetaPar = 1.0e-1;   ///< minimum value allowed for beta parameter
00055   const double MaxBetaPar = 1.0e4;    ///< maximum value allowed for beta parameter
00056   const double MinDirchPar = 1.0e-1;  ///< minimum value allowed for Dirichlet parameter
00057   const double MaxDirchPar = 1.0e4;   ///< maximum value allowed for Dirichlet parameter
00058 }
00059 
00060 lot& GetRNG();
00061 double safeFreq(double p, double zero = MCMC_ZERO_FREQ);
00062 double invSafeFreq(double x, double zero = MCMC_ZERO_FREQ); 
00063 std::vector<double> safeFreq(const std::vector<double>& p,
00064                              const double zero);
00065 std::vector<double> invSafeFreq(const std::vector<double>& x,
00066                                 const double zero = MCMC_ZERO_FREQ);
00067 
00068 double proposeBeta(double mean, double denom, double tolerance);
00069 double logQBeta(double newMean, double oldMean, double denom, 
00070                 double tolerance);
00071 std::vector<double> proposeDirch(const std::vector<double>& mean, double denom,
00072                                  double tolerance);
00073 double logQDirch(const std::vector<double>& newMean, 
00074                  const std::vector<double>& oldMean, 
00075                  double denom, double tolerance);
00076 double proposeNorm(double mean, double variance);
00077 double logQNorm(double newMean, double oldMean, double variance);
00078 double safeBetaPar(double x);
00079 void safeDirchPar(std::vector<double>& x);
00080 
00081 /// \class ParameterBase
00082 /// \brief Pure virtual class provide interface for Parameter
00083 ///
00084 class ParameterBase {
00085 public:
00086   ParameterBase(std::string label);
00087   virtual ~ParameterBase(void);
00088 
00089   virtual void Adapt(const double accept);
00090   void Assign(const boost::any& value);
00091   std::string Label(void) const;
00092   void SetLabel(const std::string& label);
00093   const boost::any Value(void) const;
00094 
00095 private:
00096   std::string label_;
00097 
00098 };
00099 
00100 /// \class ParameterT
00101 /// \brief Base class for model parameters
00102 ///
00103 /// ParameterT is one of the three workhorses in the simulation framework.
00104 /// Each parameter in the statistical model must be derived separately from
00105 /// ParameterT. It is not necessary to declare and define separate classes
00106 /// for parameters that share the same probabilistic structure, e.g., 
00107 /// allele counts in different populations or measurements of yields in
00108 /// different experimental blocks, but each parameter must be pushed
00109 /// separately onto the step_ stack in Model.
00110 ///
00111 /// It is often useful for the derived class to store a pointer to the
00112 /// model of which it is part. This allows the Model class to define access
00113 /// functions to the values of other parameters, facilitating calculation
00114 /// of the full conditionals.
00115 ///
00116 /// lPrior() and llike(), the log prior and log likelihood for a particular 
00117 /// parameter respectively, are used only as a sum. Thus, if it is more
00118 /// convenient to write the full conditional in a single function, either
00119 /// may be used. I typically find it easiest to write lPrior() as expressing
00120 /// the "probability" of the parameter, given hyperparameters on which it
00121 /// depends and llike() as expressing the "probability" of parameters (or 
00122 /// data that depend on this one. But your mileage may vary.
00123 ///
00124 template <typename T>
00125 class ParameterT : public ParameterBase {
00126 public:
00127   /// Constructor
00128   ///
00129   /// \param label    a string identifier for the parameter
00130   ///
00131   ParameterT(std::string label)
00132     : ParameterBase(label)
00133   {}
00134 
00135   /// Destructor
00136   ///
00137   virtual ~ParameterT(void) {}
00138 
00139 public:
00140   // must be public to allow access from Step classes
00141   // cannot be pure virtual, because we want 0 returns unless they
00142   // are overridden
00143 
00144   /// Returns likelihood associated with current parameter
00145   ///
00146   /// \param x  Value of the parameter in the current iteration
00147   ///
00148   virtual double llike(const T x) const {
00149     return 0.0;
00150   }
00151 
00152   /// Returns prior associated with current parameter
00153   ///
00154   /// \param x  Value of the parameter in the current iteration
00155   ///
00156   virtual double lPrior(const T x) const {
00157     return 0.0;
00158   }
00159 
00160   /// Propose a new value for a parameter in an M-H step
00161   ///
00162   /// \param current  Value of the parameter in the current iteration
00163   ///
00164   virtual T propose(const T current) const {
00165     return current;
00166   }
00167 
00168   /// Probability of proposing x, given starting from y
00169   ///
00170   /// \param x
00171   /// \param y
00172   ///
00173   virtual double lQ(const T x, const T y) const {
00174     return 0.0;
00175   }
00176 
00177   /// Assign value
00178   ///
00179   /// \param x  Value to assign to this parameter
00180   void Assign(const T& x) {
00181     x_ = x;
00182   }
00183 
00184   /// Function used to update a deterministic node
00185   ///
00186   virtual const T Function(const bool doCalc = true) const {
00187     return x_;
00188   }
00189 
00190   /// Value of the parameter
00191   ///
00192   virtual const T Value(void) const {
00193     return x_;
00194   }
00195 
00196 private:
00197   T x_;
00198 
00199 };
00200 
00201 
00202 /// \class StepBase
00203 /// \brief Base class for steps associated with different parameter types
00204 ///
00205 class StepBase {
00206 public:
00207   StepBase(ParameterBase* parameter, unsigned long accept, 
00208            unsigned long ct, const double w, const double m, 
00209            const double lowBound, const double highBound);
00210   virtual ~StepBase(void);
00211 
00212   /// Select new parameter from sampler
00213   ///
00214   virtual void DoStep(void) = 0;
00215 
00216   /// Set bounds on parameter value
00217   ///
00218   /// \param low      lower bound
00219   /// \param high     upper bound
00220   ///
00221   virtual void SetBounds(double low, double high) = 0;
00222 
00223   /// Set width of step in slice sampler
00224   ///
00225   /// \param w        width
00226   ///
00227   virtual void SetW(double w) = 0;
00228 
00229   /// Set maximum number of steps in slice sampler
00230   ///
00231   /// \param m        maximum number of steps allowed
00232   ///
00233   virtual void SetM(int m) = 0;
00234 
00235   /// Returns parameter value of the current step
00236   ///
00237   const boost::any aValue(void) const;
00238   const double Value(void) const;
00239   const int iValue(void) const;
00240   const std::vector<double> dVecValue(void) const;
00241   const std::vector<int> iVecValue(void) const;
00242 
00243   int accept(void) const;
00244   std::string Label(void) const;
00245   ParameterBase* Par(void) const;
00246   void ResetAccept(void);
00247   void SetLabel(const std::string& label);
00248 
00249 protected:
00250   ParameterBase* par_;          ///< holds the data and methods
00251   unsigned long accept_;        ///< acceptance count for M-H
00252   unsigned long ct_;            ///< number of choices so far
00253 
00254   double w_;                    ///< slice width
00255   double m_;                    ///< maximum number of steps in slice sampler
00256   double lowBound_;             ///< smallest value of parameter allowed
00257   double highBound_;            ///< largest value of parameter allowed
00258 
00259 };
00260 
00261 /// \class Step
00262 /// \brief Base class for all steps associated with one parameter type
00263 ///
00264 template <typename T>
00265 class Step : public StepBase {
00266 public:
00267   /// Constructor
00268   ///
00269   /// \param parameter  Pointer to the parameter associated with this step
00270   ///
00271   explicit Step(ParameterT<T>* parameter)
00272     : StepBase(parameter, 0, 0, 0.0, 0.0, 0.0, 0.0), rng_(GetRNG()),
00273       parT_(parameter)
00274   {}
00275 
00276   /// Destructor
00277   ///
00278   virtual ~Step(void) {}
00279 
00280 protected:
00281   /// Constructor
00282   ///
00283   /// \param parameter  Pointer to the parameter associated with this step
00284   /// \param w          step width for slice sampler
00285   /// \param m          maximum number of steps for slice sampler
00286   /// \param lowBound   minimum value of parameter allowed
00287   /// \param highBound  maximum value of parameter allowed
00288   ///
00289   Step(ParameterT<T>* parameter, double w, double m, double lowBound,
00290        double highBound)
00291     : StepBase(parameter, 0, 0, w, m, lowBound, highBound), rng_(GetRNG())
00292   {}
00293 
00294   lot& rng_;             ///< shared random number generator
00295   ParameterT<T>* parT_;  ///< pointer to parameter associated with this step
00296 
00297 };
00298 
00299 /// \class MetroStepT
00300 /// \brief Implements Metropolis-Hastings step for a parameter
00301 ///
00302 template <typename T>
00303 class MetroStepT : public Step<T> {
00304 public:
00305   /// Constructor
00306   ///
00307   /// \param parameter  Pointer to the parameter associated with this step
00308   ///
00309   explicit MetroStepT(ParameterT<T>* parameter)
00310     : Step<T>(parameter), met_(parameter), 
00311       llike_(&ParameterT<T>::llike), lPrior_(&ParameterT<T>::lPrior),
00312       propose_(&ParameterT<T>::propose), lQ_(&ParameterT<T>::lQ)
00313   {}
00314 
00315 protected:
00316   /// Accept or reject
00317   ///
00318   /// \param newX  Proposed value
00319   /// \param oldX  Old value
00320   ///
00321   /// \returns true  for accept
00322   /// \returns false for reject
00323   ///
00324   bool Accept(T& newX, T& oldX) {
00325     double piNew = (met_->*llike_)(newX) + (met_->*lPrior_)(newX);  
00326     double piOld = (met_->*llike_)(oldX) + (met_->*lPrior_)(oldX);
00327     double qNew = (met_->*lQ_)(newX, oldX);
00328     double qOld = (met_->*lQ_)(oldX, newX);
00329     //          log((pi(Y)q(X|Y))/(pi(X)q(Y|X)))
00330     double la = piNew + qOld - piOld - qNew;
00331     if (Step<T>::rng_.uniform() < exp(la)) {
00332       return true;
00333     } else {
00334       return false;
00335     }
00336   }
00337 
00338   /// Assign value to parameter associated with this step
00339   ///
00340   /// \param x  The value to assign
00341   ///
00342   void Assign(const T& x) {
00343     met_->Assign(x);
00344   }
00345 
00346   /// Get a new value from an M-H step
00347   ///
00348   virtual void DoStep(void) {
00349     T val = boost::any_cast<T>(met_->Value());
00350     T xTry = (met_->*propose_)(val);
00351     if (Accept(xTry, val)) {
00352       ++Step<T>::accept_;
00353       met_->Assign(xTry);
00354     } else {
00355       met_->Assign(val);
00356     }
00357   }
00358 
00359 protected:
00360   ParameterT<T>* met_;   ///< pointer to parameter associated with this step
00361 
00362 private:
00363   // no reason to call these
00364 
00365   /// Value of parameter associated with this step
00366   ///
00367   const T Value(void) const {
00368     return met_->Value();
00369   }
00370 
00371   /// Set bounds on parameter value 
00372   ///
00373   /// \param low    lower bound
00374   /// \param high   upper bound
00375   ///
00376   virtual void SetBounds(double low, double high) 
00377   {}
00378 
00379   /// Set width of step for slice sampler (has no effect in MetroStep)
00380   ///
00381   /// \param w    width
00382   ///
00383   virtual void SetW(double w) 
00384   {}
00385 
00386   /// Set number of steps allowed in slice sampler (has no effect in MetroStep)
00387   ///
00388   /// \param m    maximum number of steps allowed
00389   ///
00390   virtual void SetM(int m) 
00391   {}
00392 
00393   /// typedef for convenient access to function pointer
00394   ///
00395   typedef double(ParameterT<T>::*llikePtr)(const T) const;
00396   /// typedef for convenient access to function pointer
00397   ///
00398   typedef double(ParameterT<T>::*lPriorPtr)(const T) const;
00399   /// typedef for convenient access to function pointer
00400   ///
00401   typedef T(ParameterT<T>::*proposePtr)(const T) const;
00402   /// typedef for convenient access to function pointer
00403   ///
00404   typedef double(ParameterT<T>::*lQPtr)(const T, const T) const;
00405 
00406   const llikePtr llike_;        ///< pointer to likelihood
00407   const lPriorPtr lPrior_;      ///< pointer to prior
00408   const proposePtr propose_;    ///< pointer to proposal
00409   const lQPtr lQ_;              ///< pointer to q()
00410 
00411 };
00412 
00413 /// \class AdaptMetroStepT
00414 /// \brief Implements Metropolis-Hastings step for a parameter
00415 ///
00416 /// This version allows an adaptive phase to be included in during the
00417 /// burnin
00418 ///
00419 template <typename T>
00420 class AdaptMetroStepT : public MetroStepT<T> {
00421 public:
00422   /// Constructor
00423   ///
00424   /// \param parameter  Pointer to the parameter associated with this step
00425   /// \param nBurnin    Number of iterations in burn in
00426   /// \param adapt      Number of iterations for adaptation
00427   /// \param interval   Number of iterations between adaptive adjustments
00428   ///
00429   explicit AdaptMetroStepT(ParameterT<T>* parameter, 
00430                  const unsigned long nBurnin, 
00431                  const unsigned long adapt,
00432                  const unsigned long interval)
00433     : MetroStepT<T>(parameter),
00434       adapt_((nBurnin < 5000) ? 0: adapt),
00435       interval_(interval), ct_(0)
00436   {}
00437 
00438   /// Simple modification to allow adaptive phase of M-H sampling
00439   ///
00440   void DoStep(void) {
00441     T current = MetroStepT<T>::met_->Value();
00442     T xTry = MetroStepT<T>::met_->propose(current);
00443     if (Accept(xTry, current)) {
00444       ++Step<T>::accept_;
00445       MetroStepT<T>::met_->Assign(xTry);
00446     } else {
00447       MetroStepT<T>::met_->Assign(current);
00448     }
00449     ++ct_;
00450     if ((ct_ < adapt_) && ((ct_ % interval_) == 0)) {
00451       MetroStepT<T>::met_->Adapt(static_cast<double>(Step<T>::accept_)/ct_);
00452     }
00453   }
00454 
00455 private:
00456   /// Set bounds on parameter value 
00457   ///
00458   /// \param low    lower bound
00459   /// \param high   upper bound
00460   ///
00461   virtual void SetBounds(double low, double high) 
00462   {}
00463 
00464   /// Set width of step for slice sampler (has no effect in MetroStep)
00465   ///
00466   /// \param w    width
00467   ///
00468   virtual void SetW(double w) 
00469   {}
00470 
00471   /// Set number of steps allowed in slice sampler (has no effect in MetroStep)
00472   ///
00473   /// \param m    maximum number of steps allowed
00474   ///
00475   virtual void SetM(int m) 
00476   {}
00477 
00478   const unsigned long adapt_;        // number of iterations for adaptive phase
00479   const unsigned long interval_;     // interval for adjusting M-H parameters
00480   unsigned long ct_;
00481 
00482 };
00483 
00484 
00485 /// \class SliceStep
00486 /// \brief Implements a slice sampler
00487 ///
00488 /// Note: only univariate slices are supported
00489 ///
00490 /// ParameterT constructors must provide appropriate step width,
00491 /// parameter->W(), and number of tries, parameter->M(), if defaults 
00492 /// (1 and MUnbounded, respectively) are to be avoided.
00493 /// lowBound_ and highBound_ defaults must be reset if parameter range is
00494 /// bounded, e.g., frequencies in [0,1].
00495 ///
00496 class SliceStep : public Step<double> {
00497 public:
00498   explicit SliceStep(ParameterT<double>* parameter);
00499 
00500   void SetBounds(double low, double high);
00501   void SetW(double w);
00502   void SetM(int m);
00503 
00504 protected:
00505   virtual void DoStep(void);
00506 
00507 private:
00508   void Assign(const double x);
00509   void SetSlice(const double x);
00510   double Sample(const double x);
00511   const double Value(void) const; 
00512 
00513   double r_;                    // lower limit of slice
00514   double l_;                    // upper limit of slice
00515   double z_;
00516   unsigned long ct_;            // number of choices so far
00517 
00518   static const double DefaultW;
00519   static const long MUnbounded;
00520 
00521   typedef double(ParameterT<double>::*llikePtr)(const double) const;
00522   typedef double(ParameterT<double>::*lPriorPtr)(const double) const;
00523 
00524   ParameterT<double>* dbl_;
00525   llikePtr llike_;
00526   lPriorPtr lPrior_;
00527 
00528 };
00529 
00530 /// \class FunctionStepT
00531 /// \brief Implements a deterministic node
00532 ///
00533 /// Often parameters of interest are deterministic functions of other
00534 /// statistical parameters. A FunctionStep allows us to express that
00535 /// relationship. DoStep() simply invokes the Function() associated with
00536 /// this parameter and returns the result.
00537 ///
00538 template<typename T>
00539 class FunctionStepT : public Step<T> {
00540 public:
00541   /// Constructor
00542   ///
00543   /// \param parameter  Pointer to parameter associated with this step
00544   ///
00545   explicit FunctionStepT(ParameterT<T>* parameter)
00546     : Step<T>(parameter), func_(parameter)
00547   {}
00548 
00549   /// Return the value associated with this node
00550   ///
00551   /// The value is returned by the Function() associated with this paramter.
00552   ///
00553   const T Value(void) {
00554     return func_->Function();
00555   }
00556 
00557 private:
00558   /// Return the value associated with this node
00559   ///
00560   /// The value is returned by the Function() associated with this paramter.
00561   ///
00562   void DoStep(void) {
00563     func_->Assign(func_->Function());
00564   }
00565 
00566   // no reason to call these
00567 
00568   /// Sets bounds for the parameter (empty for FunctionStep)
00569   ///
00570   /// \param low     lower bound
00571   /// \param high    upper bound
00572   ///
00573   void SetBounds(const double low, const double high) 
00574   {}
00575 
00576   /// Sets width for slice sampler (empty for FunctionStep)
00577   ///
00578   /// \param w       step width
00579   ///
00580   void SetW(const double w) 
00581   {}
00582 
00583   /// Sets maximum number of steps for slice sampler (empty for FunctionStep)
00584   ///
00585   /// \param m       maximum number of steps allowed
00586   ///
00587   void SetM(const int m)
00588   {}
00589 
00590   ParameterT<T>* func_;
00591 
00592 };
00593 
00594 typedef std::vector<StepBase*> ModelSteps;///< pointer to each Step in the Model
00595 typedef std::vector<boost::any> SampleVector;   ///< parameters at one iteration
00596 typedef SampleVector::const_iterator SampleIter; ///< paramter iterator
00597 typedef std::vector<SampleVector> Results; ///< SampleVectors for all iterations
00598 typedef Results::const_iterator ResultsIter; ///< iterator over iterations
00599 
00600 /// \class Model
00601 /// \brief Implements the statistical model
00602 ///
00603 /// To build a model, derive a new class from Model. In its constructor,
00604 /// push Steps of the appropriate type onto step_. If you're happy with
00605 /// the default reports, the only other thing you'll need to do is to provide
00606 /// accessor functions to values of the parameters in step_ (or make them
00607 /// public so that Parameters can access them directly).
00608 /// 
00609 /// If you want to calculate DIC for the model, you'll have to provide an
00610 /// appropriate override for Llike(), but everything else will be taken
00611 /// care of automatically.
00612 ///
00613 class Model {
00614 public:
00615   virtual ~Model(void);
00616 
00617   void Simulation(std::ostream& outf, bool interimReport);
00618   virtual void Summarize(int i, std::ostream& outf);
00619   std::string Label(int i) const;
00620   void ReportDic(std::ostream& outf);
00621 
00622   // must be public to allow overrides for specific models
00623   // cannot be pure virtual, because the default is reasonable in
00624   // most cases
00625   virtual void Record(const SampleVector& p);
00626   virtual void InterimReport(std::ostream& outf, std::string header, 
00627                              int progress, int goal);
00628   virtual void ReportHead(std::ostream& outf);
00629 
00630   virtual void Report(std::ostream& outf, const int lastPar = -1);
00631   virtual double Llike(const SampleVector& par) const;
00632   void SetBurnIn(const int nBurnin);
00633   void SetSample(const int nSample);
00634   void SetThin(const int thin);
00635 
00636 protected:
00637   Model(int nBurnin, int nSample, int nThin, bool calc = false, 
00638         bool useMedian = false);
00639 
00640   double percent(int k, int n) const;
00641   double percent(const std::vector<int>& x, int n) const;
00642   double percent(const std::vector<std::vector<int> >& x, int n) const;
00643 
00644   int nBurnin_;                 ///< number of iterations for burn in
00645   int nSample_;                 ///< number of iterations for sample
00646   int nThin_;                   ///< number of iterations between saving results
00647   unsigned nElem_;              ///< number of parameters in the model
00648   SimpleStatistic likelihood_;  ///< stores likelihood for DIC calculations
00649 
00650   ModelSteps step_;             ///< vector of parameters to sample
00651   Results results_;             ///< vector (boost::any) of stored results
00652   virtual SampleVector Parameters(void) const;
00653 
00654   boost::format* summaryFormat_; ///< boost::format for summary output
00655 
00656 private:
00657   void Reset(void);
00658   void Generation(void);
00659 
00660   boost::progress_display* pd_;
00661   bool calculateLikelihood_;
00662   bool useMedian_;
00663 
00664 };
00665 
00666 /// typedef for convenienc access to double paramters
00667 ///
00668 typedef ParameterT<double> Parameter;
00669 /// typedef for convenienc access to double paramters
00670 ///
00671 typedef FunctionStepT<double> FunctionStep;
00672 /// typedef for convenienc access to double paramters
00673 ///
00674 typedef MetroStepT<double> MetroStep;
00675 
00676 #endif
00677 
00678 // Local Variables: //
00679 // mode: c++ //
00680 // End: //

Generated on Tue Mar 27 16:03:38 2007 for mcmc by  doxygen 1.5.1