DataTable.h

Go to the documentation of this file.
00001 ///
00002 /// \file   DataTable.h
00003 /// \brief  Provides DataTable class for access to tabular data
00004 ///
00005 /// The DataTable class reads homogeneous tabular data, i.e., numerical data 
00006 /// that is either all of the same type or that can be converted to the base
00007 /// type of the data table using standard conversions. Rows or columns or both
00008 /// can be labeled, but labels are not required.
00009 ///
00010 /// \author Kent Holsinger
00011 /// \date   2004-06-26
00012 ///
00013 
00014 // This file is part of MCMC++, a library for constructing C++ programs
00015 // that implement MCMC analyses of Bayesian statistical models.
00016 // Copyright (c) 2004-2006 Kent E. Holsinger
00017 //
00018 // MCMC++ is free software; you can redistribute it and/or modify
00019 // it under the terms of the GNU General Public License as published by
00020 // the Free Software Foundation; either version 2 of the License, or
00021 // (at your option) any later version.
00022 //
00023 // MCMC++ is distributed in the hope that it will be useful,
00024 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00025 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00026 // GNU General Public License for more details.
00027 //
00028 // You should have received a copy of the GNU General Public License
00029 // along with MCMC++; if not, write to the Free Software
00030 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00031 //
00032 
00033 #if !defined(__DATATABLE_H)
00034 #define __DATATABLE_H
00035 
00036 // standard includes
00037 #include <algorithm>
00038 #include <fstream>
00039 #include <iomanip>
00040 #include <iostream>
00041 #include <string>
00042 #include <vector>
00043 // boost includes
00044 #include <boost/tokenizer.hpp>
00045 #include <boost/spirit/core.hpp>
00046 #include <boost/spirit/utility.hpp>
00047 // local includes
00048 #include "mcmc++/util.h"
00049 
00050 /// enum DataTableResult
00051 ///
00052 /// codes used to determine whether read was successful and the type
00053 /// of error, if not
00054 ///
00055 enum DataTableResult {
00056   readSuccess = 0,
00057   labelError,
00058   valueError,
00059   openError,
00060   notEmptyError,
00061   grammarError
00062 };
00063 
00064 template <typename Type>
00065 class DataTableGrammar;
00066 
00067 /// \class BadCol
00068 /// \brief Exception thrown on bad column index
00069 ///
00070 class BadCol {};
00071 
00072 /// \class BadRow
00073 /// \brief Exception thrown on bad row index
00074 ///
00075 class BadRow {};
00076 
00077 /// argCheck_ controls whether row and column indexes are bounds checked
00078 /// before use
00079 ///
00080 /// Defaults to 1 (true) unles NDEBUG is defined
00081 ///
00082 #if defined(NDEBUG)
00083 #define argCheck_ 0
00084 #else
00085 #define argCheck_ 1
00086 #endif
00087 
00088 
00089 /// \class DataTable
00090 /// \brief Provides access to homogeneous tabular data
00091 ///
00092 /// The DataTable class reads homogeneous tabular data, i.e., numerical data 
00093 /// that is either all of type Type or that can be converted to Type using
00094 /// standard conversions. Rows or columns or both can be labeled, but labels 
00095 /// are not required. A simple method for stream output of errors is also 
00096 /// provided.
00097 ///
00098 template <typename Type>
00099 class DataTable {
00100   enum {
00101     defaultWidth = 14,      ///< default width of field in output
00102     defaultColumnSpace = 2  ///< default number of spaces between output columns
00103   };
00104 
00105 public:
00106   /// Constructor -- no default constructor is provided.
00107   ///
00108   /// \param columnLabels   Are columns labeled?
00109   /// \param rowLabels      Are rows labeled?
00110   ///
00111   /// Initializes data structures. Use Read() to collect the data.
00112   ///
00113   DataTable(const bool columnLabels = true, const bool rowLabels = false)
00114     : width_(defaultWidth), nRows_(0), nCols_(0), columnLabels_(columnLabels), 
00115       rowLabels_(rowLabels)
00116   {}
00117 
00118   /// Read data from a file.
00119   ///
00120   /// \param fileName       The name of the file from which data is to be read
00121   /// \return notEmptyError If the DataTable is not empty
00122   /// \return labelError    If there is an error reading column labels
00123   /// \return valueError    If there is an error reading values
00124   /// \return openError     If filename could not be opened for reading
00125   /// \return readSuccess   If everything works
00126   ///
00127   /// The DataTable must be empty for data to be read. If it has been used
00128   /// before, Flush() must be used to re-initialize the internal state.
00129   enum DataTableResult Read(const std::string fileName) {
00130     enum DataTableResult result = readSuccess; // readSuccess == 0
00131     if ((nRows_ > 0) || (nCols_ > 0)) { // explicit flush required
00132       result = notEmptyError;
00133     }
00134     std::ifstream input(fileName.c_str());
00135     if (input) {
00136       if (columnLabels_ && !ReadLabels(input)) {
00137         result = labelError;
00138       } else if (!ReadValues(input)) {
00139         result = valueError;
00140       }
00141       input.close();
00142     } else {
00143       result = openError;
00144     }
00145     return result;
00146   }
00147 
00148   /// Sets width of output based on length of string
00149   ///
00150   /// \param s   The string used to set the width
00151   ///
00152   void SetWidth(const std::string s) {
00153     if (boost::is_integral<Type>::value) {
00154       width_ = 4;
00155     } else {
00156       width_ = std::max(width_, s.length() + defaultColumnSpace);
00157     }
00158   }
00159 
00160   /// Value of the data at specified row and column
00161   ///
00162   /// \param row   Index of the data row
00163   /// \param col   Index of the data column
00164   ///
00165   inline Type Value(const unsigned row, const unsigned col) const {
00166     Util::Assert<BadRow>(!argCheck_ || ((row < nRows_) && (row >= 0)));
00167     Util::Assert<BadCol>(!argCheck_ || ((col < nCols_) && (col >= 0)));
00168     return data_[row][col];
00169   }
00170 
00171   /// Set value of the data at specified row and column
00172   ///
00173   /// \param row   Index of the data row
00174   /// \param col   Index of the data column
00175   /// \param value Value to be inserted
00176   ///
00177   inline void SetValue(const unsigned row, const unsigned col,
00178                        const Type value)
00179   {
00180     Util::Assert<BadRow>(!argCheck_ || ((row < nRows_) && (row >= 0)));
00181     Util::Assert<BadCol>(!argCheck_ || ((col < nCols_) && (col >= 0)));
00182     data_[row][col] = value;
00183   }
00184 
00185   /// Label associated with a particular column index 
00186   ///
00187   /// \param index   column index
00188   ///
00189   inline std::string ColumnLabel(const unsigned index) const {
00190     Util::Assert<BadCol>(!argCheck_ 
00191                          || ((index < nLabelCols_) && (index >= 0)));
00192     return cLabels_.at(index);
00193   }
00194 
00195   /// Label associated with a particular row index 
00196   ///
00197   /// \param index   row index
00198   ///
00199   inline std::string RowLabel(const unsigned index) const {
00200     Util::Assert<BadRow>(!argCheck_ || ((index < nRows_) && (index >= 0)));
00201     return rLabels_.at(index);
00202   }
00203 
00204   /// An entire row of the data matrix
00205   ///
00206   /// \param row   Index of the data row
00207   ///
00208   std::vector<Type> RowVector(const unsigned row) const {
00209     Util::Assert<BadRow>(!argCheck_ || ((row < nRows_) && (row >= 0)));
00210     return data_[row];
00211   }
00212 
00213   /// An entire column of the data matrix
00214   ///
00215   /// \param col   Index of the data column
00216   ///
00217   std::vector<Type> ColumnVector(const unsigned col) const {
00218     Util::Assert<BadCol>(!argCheck_ || ((col < nCols_) && (col >= 0)));
00219     std::vector<Type> x(nRows_);
00220     for (unsigned i = 0; i < nRows_; ++i) {
00221       x[i] = Value(i, col);
00222     }
00223     return x;
00224   }
00225 
00226   /// Print the table to the specified stream 
00227   ///
00228   /// \param out   The stream for output (defaults to std::cout)
00229   ///
00230   void PrintTable(std::ostream& out = std::cout) {
00231     if (columnLabels_) {
00232       PrintLabels(out);
00233     }
00234     for (unsigned i = 0; i < nRows_; ++i) {
00235       if (rowLabels_) {
00236         out << RowLabel(i) << ": ";
00237       }
00238       PrintValueRow(out, i);
00239     }
00240   }
00241 
00242   /// Re-initialize internal data structures.
00243   ///
00244   void Flush(void) {
00245     cLabels_.clear();
00246     rLabels_.clear();
00247     data_.clear();
00248     nRows_ = nCols_ = nLabelCols_ = 0;
00249   }
00250 
00251   /// Number of rows in the data
00252   ///
00253   inline unsigned Rows(void) const {
00254     return nRows_;
00255   }
00256 
00257   /// Number of columns in the data
00258   ///
00259   inline unsigned Columns(void) const {
00260     return nCols_;
00261   }
00262 
00263   /// Number of column labels
00264   ///
00265   inline unsigned ColumnLabels(void) const {
00266     return nLabelCols_;
00267   }
00268 
00269   /// Set all data elements to zero
00270   ///
00271   void SetZero(void) {
00272     for (unsigned i = 0; i < nRows_; ++i) {
00273       for (unsigned j = 0; j < nCols_; ++j) {
00274         data_[i][j] = 0;
00275       }
00276     }
00277   }
00278 
00279 private:
00280 
00281   bool ReadLabels(std::istream& in) {
00282     using namespace boost;
00283     
00284     std::string s;
00285     std::getline(in, s);
00286     typedef tokenizer<char_separator<char> > localTokenizer;
00287     char_separator<char> sep(" \t:`~!@#$%^&*()+={}[]\\|;:\'\",.<>/?\r");
00288     localTokenizer tok(s, sep);
00289     for (localTokenizer::iterator i = tok.begin(); i != tok.end(); ++i) {
00290       cLabels_.push_back(*i);
00291     }
00292     nLabelCols_ = cLabels_.size();
00293     return nLabelCols_ > 0;
00294   }
00295 
00296   bool ReadValues(std::istream& in) {
00297     using namespace boost;
00298     using namespace boost::spirit;
00299 
00300     std::string s;
00301     parse_info<> info;
00302     bool result = true;
00303     while (std::getline(in, s)) {
00304       // needed (for some reason) for tables that start without spaces
00305       s.insert(s.begin(), ' ');
00306       data_.resize(nRows_ + 1);
00307       std::vector<double> tempData;
00308       if (rowLabels_) {
00309         typedef tokenizer<char_separator<char> > localTokenizer;
00310         char_separator<char> sep(" \t:`~!@#$%^&*()+={}[]\\|;:\'\",.<>/?");
00311         localTokenizer tok(s, sep);
00312         localTokenizer::iterator i = tok.begin();
00313         rLabels_.push_back(*i);
00314         chset<> alnum("0-9a-zA-Z");
00315         info = parse(s.c_str(),
00316                      //
00317                      ( 
00318                       alnum >> *(ch_p('-') | ch_p('_') | alnum) >>
00319                       ch_p(':') >> 
00320                       *space_p >> 
00321                       real_p[append(tempData)] >>
00322                       *(*space_p >> real_p[append(tempData)]) 
00323                      ),
00324                      //
00325                      space_p);
00326       } else {
00327         info = parse(s.c_str(),
00328                      //
00329                      (real_p[append(tempData)] >>
00330                       *(*space_p >> real_p[append(tempData)]) ),
00331                      //
00332                      space_p);
00333       }
00334       for (unsigned i = 0; i < tempData.size(); ++i) {
00335         data_[nRows_].push_back(static_cast<Type>(tempData[i]));
00336       }
00337       result = (result && info.hit);
00338       ++nRows_;
00339     }
00340     if (result && (nCols_ == 0)) {
00341       nCols_ = data_[0].size();
00342     }
00343     return (result && (nRows_ > 0));
00344   }
00345   
00346   void PrintLabels(std::ostream& out) {
00347     typedef std::vector<std::string>::const_iterator LabelIter;
00348     LabelIter end = cLabels_.end();
00349     for (LabelIter i = cLabels_.begin(); i != end; ++i) {
00350       SetWidth(*i);
00351     }
00352     for (LabelIter i = cLabels_.begin(); i != end; ++i) {
00353       Print(out, *i);
00354     }
00355     out << std::endl;
00356   }
00357 
00358   void PrintValueRow(std::ostream& out, const unsigned row) {
00359     Util::Assert<BadRow>(!argCheck_ || ((row < nRows_) && (row >= 0)));
00360     for (unsigned col = 0; col < nCols_; ++col) {
00361       out << std::setw(width_) << data_[row][col];
00362     }
00363     out << std::endl;
00364   }
00365 
00366   template <class OutputType>
00367   void Print(std::ostream& out, const OutputType& s) {
00368     out << std::setw(width_+2) << s;
00369   }
00370 
00371   std::vector<std::string> rLabels_;
00372   std::vector<std::string> cLabels_;
00373   std::vector<std::vector<Type> > data_;
00374 
00375   unsigned width_;
00376   unsigned nRows_;
00377   unsigned nCols_;
00378   unsigned nLabelCols_;
00379 
00380   bool columnLabels_;
00381   bool rowLabels_;
00382 
00383 };
00384 
00385 std::ostream& operator<< (std::ostream& out, 
00386                           enum DataTableResult result);
00387 
00388 #endif
00389 
00390 // Local Variables: //
00391 // mode: c++ //
00392 // End: //

Generated on Tue Mar 27 16:03:38 2007 for mcmc by  doxygen 1.5.1