hist.h
Go to the documentation of this file.
1 /*
2  -------------------------------------------------------------------
3 
4  Copyright (C) 2010-2017, Andrew W. Steiner
5 
6  This file is part of O2scl.
7 
8  O2scl is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version.
12 
13  O2scl is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with O2scl. If not, see <http://www.gnu.org/licenses/>.
20 
21  -------------------------------------------------------------------
22 */
23 #ifndef O2SCL_HIST_H
24 #define O2SCL_HIST_H
25 
26 /** \file hist.h
27  \brief File defining \ref o2scl::hist
28 */
29 #include <iostream>
30 
31 #include <boost/numeric/ublas/vector.hpp>
32 
33 #include <o2scl/convert_units.h>
34 #include <o2scl/interp.h>
35 #include <o2scl/uniform_grid.h>
36 #include <o2scl/table.h>
37 
38 // Forward definition of the hist class for HDF I/O
39 namespace o2scl {
40  class hist;
41 }
42 
43 // Forward definition of HDF I/O to extend friendship in hist
44 namespace o2scl_hdf {
45  class hdf_file;
46  void hdf_input(hdf_file &hf, o2scl::hist &t, std::string name);
47  void hdf_output(hdf_file &hf, o2scl::hist &t, std::string name);
48 }
49 
50 #ifndef DOXYGEN_NO_O2NS
51 namespace o2scl {
52 #endif
53 
54  /** \brief A one-dimensional histogram class
55 
56  See discussion in the User's guide in the \ref hist_section
57  section.
58 
59  One may set the histogram bins using \ref set_bin_edges() or one
60  may manually set the limit of one bin using the reference
61  returned by get_bin_low(), get_bin_low_i(), get_bin_high(), or
62  get_bin_high_i(). Note that if one attempts to set the bins on a
63  histogram where the bins have already been set, one must ensure
64  that the new and old bin sets have the same size. This ensures
65  that there is no ambiguity in rebinning the data and also
66  prevents accidental data loss. One may set the bin edges
67  either with a generic vector, or as a \ref uniform_grid object.
68 
69  To save space, representative vectors are not allocated until
70  they are used.
71 
72  \note In order to ensure the histogram does not employ
73  user-specified representative values that are not defined, the
74  function \ref set_rep_mode() does not allow one to change the
75  mode to \ref rmode_user directly. Instead, use \ref set_reps()
76  which automatically sets the mode to \ref rmode_user and allows
77  the user to specify the representatives.
78 
79  \note If the user changes the bin edges and the histogram is in
80  mode \ref rmode_user, the bin weights will not be modified and
81  the same representative values will be assumed for the new bin
82  edges.
83 
84  \hline
85 
86  \todo Check implementation of <tt>hist::extend_lhs</tt>.
87  \todo More testing.
88 
89  \future
90  - Add a counter which counts the number of calls to update()?
91  - Add conversions back and forth from GSL histograms
92  - Create extend_lhs too?
93  - Would be nice not to have to create a new \ref
94  o2scl::search_vec object in \ref o2scl::hist::get_bin_index()
95  (make a search_vec data member?)
96  - Consider adding the analogs of the GSL histogram
97  sampling functions (separate class?)
98  - Add a function which computes the bin sizes?
99  - Allow rebinning?
100  - Add histograms of float and integer values
101  - Allow addition and other operations for two histograms.
102  - Make the interpolation functions \c const (this is a bit
103  complicated because of \ref o2scl::hist::set_reps_auto() ).
104 
105  \hline
106 
107  Internally, none of the vectors should have memory allocated for
108  them when hsize is zero, and the vector sizes should match the
109  histogram size. These and other checks are performed by \ref
110  is_valid() . Also, the function \ref set_reps_auto() should not
111  be called when mode is \ref rmode_user.
112  */
113  class hist {
114 
115  public:
116 
118 
119  protected:
120 
121  /// Bin locations (N+1)
122  ubvector ubin;
123 
124  /// Bin contents (N)
125  ubvector uwgt;
126 
127  /// Bin representative values (N)
128  ubvector urep;
129 
130  /// User-defined representative values (N)
131  ubvector user_rep;
132 
133  /// Number of bins
134  size_t hsize;
135 
136  /// Representative mode
137  size_t rmode;
138 
139  /// Interpolation type
140  size_t itype;
141 
142  /** \brief Set the representative array according to current
143  rmode (if not in user rep mode)
144  */
145  void set_reps_auto();
146 
147  /// Interpolation typedef
149 
150  /** \brief Allocate vectors for a histogram of size \c n
151 
152  This function also sets all the weights to zero.
153  */
154  void allocate(size_t n);
155 
156  public:
157 
158  /// Create an empty histogram
159  hist();
160 
161  ~hist();
162 
163  /// Copy constructor
164  hist(const hist &h);
165 
166  /// Copy constructor
167  hist &operator=(const hist &h);
168 
169  /// Create from a vectors of data
170  template<class vec_t> hist(size_t nv, const vec_t &v, size_t n_bins) {
171 
172  itype=1;
173  rmode=rmode_avg;
174  hsize=0;
175  extend_lhs=true;
176  extend_rhs=true;
177 
178  double min, max;
179  o2scl::vector_minmax_value(nv,v,min,max);
181  set_bin_edges(ug);
182 
183  for(size_t i=0;i<nv;i++) {
184  update(v[i]);
185  }
186  return;
187  }
188 
189  /// Create from a vectors of data
190  template<class vec_t, class vec2_t>
191  hist(size_t nv, const vec_t &v,
192  const vec2_t &v2, size_t n_bins) {
193 
194  itype=1;
195  rmode=rmode_avg;
196  hsize=0;
197  extend_lhs=true;
198  extend_rhs=true;
199 
200  double min, max;
201  o2scl::vector_minmax_value(nv,v,min,max);
203  set_bin_edges(ug);
204 
205  for(size_t i=0;i<nv;i++) {
206  update(v[i],v2[i]);
207  }
208  return;
209  }
210 
211  /// Create from vectors of data
212  template<class vec_t> hist(const vec_t &v, size_t n_bins) {
213  size_t nv=v.size();
214  hist(nv,v,n_bins);
215  return;
216  }
217 
218  /// Create from vectors of data
219  template<class vec_t, class vec2_t> hist
220  (const vec_t &v, const vec2_t &v2, size_t n_bins) {
221 
222  size_t nv=v.size();
223  hist(nv,v,v2,n_bins);
224  return;
225  }
226 
227  // Create a histogram from a \ref o2scl::table object
228  void from_table(o2scl::table<> &t, std::string colx,
229  size_t n_bins) {
230  *this=hist(t.get_nlines(),t.get_column(colx),n_bins);
231  return;
232  }
233 
234  // Create a histogram from a \ref o2scl::table object
235  void from_table(o2scl::table<> &t, std::string colx, std::string coly,
236  size_t n_bins) {
237  *this=hist(t.get_nlines(),t.get_column(colx),t.get_column(coly),
238  n_bins);
239  return;
240  }
241 
242  /// The histogram size
243  size_t size() const {
244  return hsize;
245  }
246 
247  /** \brief If true, allow abcissae beyond the last bin (default false)
248 
249  If this is true, the histogram will allow data with
250  corresponding to bins larger than the largest bin
251  (for increasing bin settings) or smaller than the
252  smallest bin (for decreasing bin settings).
253  */
255 
256  /** \brief If true, allow abcissae before the first bin (default false)
257  */
259 
260  /// \name Initial bin setup
261  //@{
262  /** \brief Set bins from a \ref uniform_grid object
263 
264  If the current histogram is not empty, then the
265  number of bins reported by \ref uniform_grid<>::get_nbins()
266  should be equal to the current histogram size so that the
267  number of bins is equal and we can use the same weights.
268 
269  If either the histogram is empty, or the current
270  representative mode is not \ref rmode_user, then the
271  representative mode is automatically set to \ref rmode_avg (or
272  \ref rmode_gmean if \ref uniform_grid::is_log() returns \c
273  true ) .
274  */
275  void set_bin_edges(uniform_grid<double> g);
276 
277  /** \brief Set the bins from a vector
278 
279  The parameter \c n is the size of the vector, equal to the
280  number of edges (always one more than the number of bins). If
281  the current histogram is not empty, then \c n should be equal
282  one larger to the size reported by \ref size() so that the
283  number of bins is equal and we can use the same weights.
284  */
285  template<class vec_t> void set_bin_edges(size_t n, const vec_t &v) {
286  if (n!=hsize+1) {
287  if (hsize!=0) {
288  O2SCL_ERR2("Requested binning change in non-empty ",
289  "histogram in hist::set_bin_edges().",exc_efailed);
290  }
291  allocate(n-1);
292  }
293  for(size_t i=0;i<n;i++) ubin[i]=v[i];
294  // Reset internal reps
295  if (urep.size()>0) urep.clear();
296  return;
297  }
298  //@}
299 
300  /// \name Weight functions
301  //@{
302  /// Increment bin for \c x by value \c val
303  void update(double x, double val=1.0);
304 
305  /// Increment bin with index \c i by value \c val
306  void update_i(size_t i, double val=1.0) {
307  uwgt[i]+=val;
308  return;
309  }
310 
311  /// Return contents of bin with index \c i
312  const double &get_wgt_i(size_t i) const;
313 
314  /// Return contents of bin with index \c i
315  double &get_wgt_i(size_t i);
316 
317  /// Return contents of bin for \c x
318  const double &get_wgt(double x) const {
319  return get_wgt_i(get_bin_index(x));
320  }
321 
322  /// Return contents of bin for \c x
323  double &get_wgt(double x) {
324  return get_wgt_i(get_bin_index(x));
325  }
326 
327  /// Set contents of bin with index \c i to value \c val
328  void set_wgt_i(size_t i, double val);
329 
330  /// Set contents of bin for \c x to value \c val
331  void set_wgt(double x, double val) {
332  set_wgt_i(get_bin_index(x),val);
333  return;
334  }
335 
336  /// Get a reference to the full y vector
337  const ubvector &get_wgts() const {
338  return uwgt;
339  }
340 
341  /// Get a reference to the weight for the bin at index \c i
342  const double &operator[](size_t i) const {
343  return uwgt[i];
344  }
345 
346  /// Get a reference to the weight for the bin at index \c i
347  double &operator[](size_t i) {
348  return uwgt[i];
349  }
350  //@}
351 
352  /// \name Bin manipulation
353  //@{
354  /** \brief Get the index of the bin which holds \c x
355 
356  Always returns a value between 0 and size() (inclusive)
357  */
358  size_t get_bin_index(double x) const;
359 
360  /// Get the lower edge of bin of index \c i
361  double &get_bin_low_i(size_t i);
362 
363  /// Get the lower edge of bin of index \c i
364  const double &get_bin_low_i(size_t i) const;
365 
366  /// Get the upper edge of bin of index \c i
367  double &get_bin_high_i(size_t i);
368 
369  /// Get the upper edge of bin of index \c i
370  const double &get_bin_high_i(size_t i) const;
371 
372  /// Get the lower edge of bin of index \c i
373  double &get_bin_low(double x) {
374  return get_bin_low_i(get_bin_index(x));
375  }
376 
377  /// Get the lower edge of bin of index \c i
378  const double &get_bin_low(double x) const {
379  return get_bin_low_i(get_bin_index(x));
380  }
381 
382  /// Get the upper edge of bin of index \c i
383  double &get_bin_high(double x) {
384  return get_bin_high_i(get_bin_index(x));
385  }
386 
387  /// Get the upper edge of bin of index \c i
388  const double &get_bin_high(double x) const {
389  return get_bin_high_i(get_bin_index(x));
390  }
391 
392  /// Get a reference to the full vector of bin specifications
393  const ubvector &get_bins() const {
394  return ubin;
395  }
396  //@}
397 
398  /// \name Max and min functions
399  //@{
400  /** \brief Get maximum weight
401  */
402  double get_max_wgt() const;
403 
404  /** \brief Get the bin index of the maximum weight
405  */
406  size_t get_max_index() const;
407 
408  /** \brief Get the representative for the bin with maximum weight
409  */
410  double get_max_rep();
411 
412  /** \brief Get minimum weight
413  */
414  double get_min_wgt() const;
415 
416  /** \brief Get the bin index of the minimum weight
417  */
418  size_t get_min_index() const;
419 
420  /** \brief Get the representative for the bin with minimum weight
421  */
422  double get_min_rep();
423  //@}
424 
425  /// \name Delete functions
426  //@{
427  /// Clear the data, but leave the bins as is
428  void clear_wgts();
429 
430  /// Clear the entire histogram
431  void clear();
432  //@}
433 
434  /// \name Representative modes (default is rmode_avg)
435  // Using \c rmode_avg in documentation doesn't work.
436  //@{
437  /// Average lower and upper edge
438  static const size_t rmode_avg=0;
439  /// Use user-specified representative
440  static const size_t rmode_user=1;
441  /// Use lower edge
442  static const size_t rmode_low=2;
443  /// Use upper edge
444  static const size_t rmode_high=3;
445  /// Use the geometric mean of the lower and upper edges
446  static const size_t rmode_gmean=4;
447  //@}
448 
449  /// \name Representative functions
450  //@{
451  /// Set the representative x-values for each bin
452  template<class vec_t> void set_reps(size_t n, const vec_t &v) {
453  if (n!=hsize) {
454  std::string s="Expected a vector of size "+itos(hsize)+
455  " and got a vector of size "+itos(n)+" in hist::set_reps().";
456  O2SCL_ERR(s.c_str(),exc_einval);
457  }
458  rmode=rmode_user;
459  if (user_rep.size()>0) user_rep.clear();
460  user_rep.resize(n);
461  for(size_t i=0;i<n;i++) user_rep[i]=v[i];
462  return;
463  }
464 
465  /** \brief Set mode used to compute bin representatives
466 
467  Acceptable inputs are \ref rmode_avg, \ref rmode_low,
468  \ref rmode_high, and \ref rmode_gmean .
469  */
470  void set_rep_mode(size_t mode);
471 
472  /// Get mode used to compute bin representatives
473  size_t get_rep_mode() const {
474  return rmode;
475  }
476 
477  /** \brief Return the representative of bin of index \c i
478 
479  Note that this function returns a value and not a reference.
480  This is because we can't return a reference to the internally
481  computed representatives, since they don't always exist.
482  */
483  double get_rep_i(size_t i);
484 
485  /// Return the representative of bin containing \c x
486  double get_rep(double x) {
487  return get_rep_i(get_bin_index(x));
488  }
489 
490  /** \brief Create a vector filled with the representatives for
491  each bin
492  */
493  template<class resize_vec_t> void create_rep_vec(resize_vec_t &v) {
494  v.resize(hsize);
495  for(size_t i=0;i<hsize;i++) {
496  v[i]=get_rep_i(i);
497  }
498  return;
499  }
500  //@}
501 
502  /// \name Evaluation and interpolation functions
503  //@{
504  /// Return the value of the function at \c x
505  double operator()(double x);
506 
507  /// Return the value of the function at \c x
508  double interp(double x);
509 
510  /// Return the derivative of the function at \c x
511  double deriv(double x);
512 
513  /// Return the second derivative of the function at \c x
514  double deriv2(double x);
515 
516  /// Return the integral of the function between \c x and \c y
517  double integ(double x, double y);
518 
519  /// Set the interpolation type
520  void set_interp_type(size_t interp_type);
521  //@}
522 
523  /// \name Other functions
524  //@{
525  /// Return the sum of all of the weights
526  double sum_wgts();
527 
528  /** \brief Return the integral under the histogram
529 
530  This function returns the sum of
531  \f[
532  w_i ( \mathrm{high}_i - \mathrm{low}_i) \, .
533  \f]
534  */
535  double integ_wgts();
536 
537  /** \brief This function copies all bin representative values to
538  the vector \c v, presuming that it has already been allocated
539  */
540  template<class vec_t> void copy_reps(vec_t &v) {
541 #if !O2SCL_NO_RANGE_CHECK
542  is_valid();
543 #endif
544  // If we're in user mode, just return the user value
545  if (rmode==rmode_user) {
546  for(size_t i=0;i<hsize;i++) {
547  v[i]=user_rep[i];
548  }
549  return;
550  }
551  // Check if the internal reps are not already computed
552  if (urep.size()==0) set_reps_auto();
553  // Copy the data over
554  for(size_t i=0;i<hsize;i++) {
555  v[i]=urep[i];
556  }
557  return;
558  }
559 
560  /** \brief Get the representative values for the bins
561  and store them in vector \c v using <tt>std::swap</tt> .
562 
563  This function resizes the vector \c v if necessary.
564  */
565  void swap_reps(ubvector &v);
566 
567  /** \brief Renormalize the weights to fix the integral
568 
569  This computes the integral using \ref integ() and so the
570  action of this function depends on the interpolation type.
571  If the histogram is empty, an exception is thrown.
572  */
573  void normalize(double new_sum);
574 
575  /// Internal consistency check
576  void is_valid() const;
577 
578  /** \brief Copy histogram data to a table
579 
580  First, if the table \c t has less rows than the histogram has
581  bins, then new rows are added to the table and values in the
582  new rows of the current columns are set to zero. Second, this
583  creates new columns in the table named \c reps, \c
584  lower_edges, \c upper_edges, and \c weights . Finally,
585  the histogram data is copied to the four new columns.
586  */
587  void copy_to_table(table<> &t, std::string reps, std::string lower_edges,
588  std::string upper_edges, std::string weights);
589  //@}
590 
591  // Allow HDF I/O function to access hist data
592  friend void o2scl_hdf::hdf_output(o2scl_hdf::hdf_file &hf, o2scl::hist &h,
593  std::string name);
594 
595  // Allow HDF I/O function to access hist data
597  std::string name);
598 
599  };
600 
601 #ifndef DOXYGEN_NO_O2NS
602 }
603 #endif
604 
605 #endif
Interpolation class for pre-specified vector.
Definition: interp.h:1685
interp_vec< ubvector > interp_t
Interpolation typedef.
Definition: hist.h:148
ubvector ubin
Bin locations (N+1)
Definition: hist.h:122
size_t size() const
The histogram size.
Definition: hist.h:243
size_t itype
Interpolation type.
Definition: hist.h:140
The main O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl names...
Definition: anneal.h:42
size_t get_rep_mode() const
Get mode used to compute bin representatives.
Definition: hist.h:473
size_t get_nlines() const
Return the number of lines.
Definition: table.h:457
Data table table class.
Definition: table.h:49
void set_wgt(double x, double val)
Set contents of bin for x to value val.
Definition: hist.h:331
invalid argument supplied by user
Definition: err_hnd.h:59
ubvector uwgt
Bin contents (N)
Definition: hist.h:125
const ubvector & get_wgts() const
Get a reference to the full y vector.
Definition: hist.h:337
bool extend_rhs
If true, allow abcissae beyond the last bin (default false)
Definition: hist.h:254
hist(const vec_t &v, size_t n_bins)
Create from vectors of data.
Definition: hist.h:212
generic failure
Definition: err_hnd.h:61
A one-dimensional histogram class.
Definition: hist.h:113
ubvector urep
Bin representative values (N)
Definition: hist.h:128
const vec_t & get_column(std::string scol) const
Returns a reference to the column named col. .
Definition: table.h:619
bool extend_lhs
If true, allow abcissae before the first bin (default false)
Definition: hist.h:258
const double & get_bin_high(double x) const
Get the upper edge of bin of index i.
Definition: hist.h:388
double get_rep(double x)
Return the representative of bin containing x.
Definition: hist.h:486
void update_i(size_t i, double val=1.0)
Increment bin with index i by value val.
Definition: hist.h:306
ubvector user_rep
User-defined representative values (N)
Definition: hist.h:131
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
Definition: err_hnd.h:281
hist(size_t nv, const vec_t &v, size_t n_bins)
Create from a vectors of data.
Definition: hist.h:170
const ubvector & get_bins() const
Get a reference to the full vector of bin specifications.
Definition: hist.h:393
The O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl namespace ...
Definition: table.h:53
void vector_minmax_value(size_t n, vec_t &data, data_t &min, data_t &max)
Compute the minimum and maximum of the first n elements of a vector.
Definition: vector.h:1229
const double & operator[](size_t i) const
Get a reference to the weight for the bin at index i.
Definition: hist.h:342
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
Definition: err_hnd.h:273
double & get_wgt(double x)
Return contents of bin for x.
Definition: hist.h:323
void copy_reps(vec_t &v)
This function copies all bin representative values to the vector v, presuming that it has already bee...
Definition: hist.h:540
double & get_bin_low(double x)
Get the lower edge of bin of index i.
Definition: hist.h:373
double & get_bin_high(double x)
Get the upper edge of bin of index i.
Definition: hist.h:383
size_t rmode
Representative mode.
Definition: hist.h:137
const double & get_bin_low(double x) const
Get the lower edge of bin of index i.
Definition: hist.h:378
A class representing a uniform linear or logarithmic grid.
Definition: uniform_grid.h:38
void set_reps(size_t n, const vec_t &v)
Set the representative x-values for each bin.
Definition: hist.h:452
size_t hsize
Number of bins.
Definition: hist.h:134
void create_rep_vec(resize_vec_t &v)
Create a vector filled with the representatives for each bin.
Definition: hist.h:493
Store data in an O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$sc...
Definition: hdf_file.h:96
const double & get_wgt(double x) const
Return contents of bin for x.
Definition: hist.h:318
double & operator[](size_t i)
Get a reference to the weight for the bin at index i.
Definition: hist.h:347
std::string itos(int x)
Convert an integer to a string.
void hdf_input(hdf_file &hf, o2scl::table< vec_t > &t, std::string name)
Input a o2scl::table object from a hdf_file.
Definition: hdf_io.h:59
hist(size_t nv, const vec_t &v, const vec2_t &v2, size_t n_bins)
Create from a vectors of data.
Definition: hist.h:191
void set_bin_edges(size_t n, const vec_t &v)
Set the bins from a vector.
Definition: hist.h:285
Interpolation class for general vectors.
Definition: interp.h:1558
Linear grid with fixed number of bins and fixed endpoint.
Definition: uniform_grid.h:315

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).