HIPO  4.3.0
High Performance Output data format for experimental physics
parser.cpp
Go to the documentation of this file.
1 /*
2 
3  Parser - an expression parser
4 
5  Author: Nick Gammon
6  http://www.gammon.com.au/
7 
8 (C) Copyright Nick Gammon 2004. Permission to copy, use, modify, sell and
9 distribute this software is granted provided this copyright notice appears
10 in all copies. This software is provided "as is" without express or implied
11 warranty, and with no claim as to its suitability for any purpose.
12 
13 Modified 24 October 2005 by Nick Gammon.
14 
15  1. Changed use of "abs" to "fabs"
16  2. Changed inclues from math.h and time.h to fmath and ftime
17  3. Rewrote DoMin and DoMax to inline the computation because of some problems with some libraries.
18  4. Removed "using namespace std;" and put "std::" in front of std namespace names where appropriate
19  5. Removed MAKE_STRING macro and inlined the functionality where required.
20  6. Changed Evaluate function to take its argument by reference.
21 
22 Modified 13 January 2010 by Nick Gammon.
23 
24  1. Changed getrandom to work more reliably (see page 2 of discussion thread)
25  2. Changed recognition of numbers to allow for .5 (eg. "a + .5" where there is no leading 0)
26  Also recognises -.5 (so you don't have to write -0.5)
27  3. Fixed problem where (2+3)-1 would not parse correctly (- sign directly after parentheses)
28  4. Fixed problem where changing a parameter and calling p.Evaluate again would fail because the
29  initial token type was not reset to NONE.
30 
31 Modified 16 February 2010 by Nick Gammon
32 
33  1. Fixed bug where if you called Evaluate () twice, the original expression would not be reprocessed.
34 
35 Modified 27 November 2014 by Nick Gammon
36 
37  1. Fixed bug where a literal number followed by EOF would throw an error.
38 
39 Thanks to various posters on my forum for suggestions. The relevant post is currently at:
40 
41  http://www.gammon.com.au/forum/?id=4649
42 
43 */
44 
49 #include "parser.h"
50 
51 namespace hipo {
52 
53 
54 // returns a number from 0 up to, but excluding x
55 const int getrandom (const int x)
56 {
57  if (x <= 0)
58  return 0;
59 
60  // r will be between 0 and 1 (but below 1 as we are dividing by RAND_MAX+1)
61  double r = static_cast<double> (std::rand () % RAND_MAX) / (static_cast<double> (RAND_MAX) + 1.0);
62  return floor (r * x);
63 
64 } // end of getrandom
65 
66 const int roll (const int howmany, const int die)
67 {
68  int count;
69  int total = 0;
70 
71  for (count = 0; count < howmany; ++count)
72  total += getrandom (die) + 1;
73 
74  return total;
75 
76 } // end of roll
77 
78 
79 // returns true if a x% probability exists
80 // eg. percent (80) will be true 80% of the time
81 const bool percent (const int prob)
82  {
83  if (prob <= 0)
84  return false;
85  if (prob >= 100)
86  return true;
87 
88  return getrandom (100) > (100 - prob);
89 
90  }
91 
92 static int initRandom ()
93  {
94  srand (time (NULL));
95 #ifndef WIN32
96  srand48 (time (NULL));
97 #endif
98  return 0;
99  }
100 
101 // initialise random number generator
102 static int someNumber = initRandom ();
103 
104 /*
105 
106 Expression-evaluator
107 --------------------
108 
109 Author: Nick Gammon
110 -------------------
111 
112 
113 Example usage:
114 
115  Parser p ("2 + 2 * (3 * 5) + nick");
116 
117  p.symbols_ ["nick"] = 42;
118 
119  double v = p.Evaluate ();
120 
121  double v1 = p.Evaluate ("5 + 6"); // supply new expression and evaluate it
122 
123 Syntax:
124 
125  You can use normal algebraic syntax.
126 
127  Multiply and divide has higher precedence than add and subtract.
128 
129  You can use parentheses (eg. (2 + 3) * 5 )
130 
131  Variables can be assigned, and tested. eg. a=24+a*2
132 
133  Variables can be preloaded:
134 
135  p.symbols_ ["abc"] = 42;
136  p.symbols_ ["def"] = 42;
137 
138  Afterwards they can be retrieved:
139 
140  x = p.symbols_ ["abc"];
141 
142  There are 2 predefined symbols, "pi" and "e".
143 
144  You can use the comma operator to load variables and then use them, eg.
145 
146  a=42, b=a+6
147 
148  You can use predefined functions, see below for examples of writing your own.
149 
150  42 + sqrt (64)
151 
152 
153  Comparisons
154  -----------
155 
156  Comparisons work by returning 1.0 if true, 0.0 if false.
157 
158  Thus, 2 > 3 would return 0.0
159  3 > 2 would return 1.0
160 
161  Similarly, tests for truth (eg. a && b) test whether the values are 0.0 or not.
162 
163  If test
164  -------
165 
166  There is a ternary function: if (truth-test, true-value, false-value)
167 
168  eg. if (1 < 2, 22, 33) returns 22
169 
170 
171  Precedence
172  ----------
173 
174  ( ) = - nested brackets, including function calls like sqrt (x), and assignment
175  * / - multiply, divide
176  + - - add and subtract
177  < <= > >= == != - comparisons
178  && || - AND and OR
179  , - comma operator
180 
181  Credits:
182 
183  Based in part on a simple calculator described in "The C++ Programming Language"
184  by Bjarne Stroustrup, however with considerable enhancements by me, and also based
185  on my earlier experience in writing Pascal compilers, which had a similar structure.
186 
187 */
188 
189 // functions we can call from an expression
190 
191 double DoInt (double arg)
192  {
193  return (int) arg; // drop fractional part
194  }
195 
196 double DoRandom (double arg)
197  {
198  return getrandom (static_cast <int> (arg)); // random number in range 0 to arg
199  }
200 
201 double DoPercent (double arg)
202  {
203  if (percent (static_cast <int> (arg))) // true x% of the time
204  return 1.0;
205  else
206  return 0.0;
207  }
208 
209 const double DoMin (const double arg1, const double arg2)
210  {
211  return (arg1 < arg2 ? arg1 : arg2);
212  }
213 
214 const double DoMax (const double arg1, const double arg2)
215  {
216  return (arg1 > arg2 ? arg1 : arg2);
217  }
218 
219 const double DoFmod (const double arg1, const double arg2)
220  {
221  if (arg2 == 0.0)
222  throw std::runtime_error ("Divide by zero in mod");
223 
224  return fmod (arg1, arg2);
225  }
226 
227 const double DoPow (const double arg1, const double arg2)
228  {
229  return pow (arg1, arg2);
230  }
231 
232 const double DoRoll (const double arg1, const double arg2)
233  {
234  return roll (static_cast <int> (arg1), static_cast <int> (arg2));
235  }
236 
237 const double DoIf (const double arg1, const double arg2, const double arg3)
238  {
239  if (arg1 != 0.0)
240  return arg2;
241  else
242  return arg3;
243  }
244 
245 typedef double (*OneArgFunction) (double arg);
246 typedef const double (*TwoArgFunction) (const double arg1, const double arg2);
247 typedef const double (*ThreeArgFunction) (const double arg1, const double arg2, const double arg3);
248 
249 // maps of function names to functions
250 static std::map<std::string, OneArgFunction> OneArgumentFunctions;
251 static std::map<std::string, TwoArgFunction> TwoArgumentFunctions;
252 static std::map<std::string, ThreeArgFunction> ThreeArgumentFunctions;
253 
254 // for standard library functions
255 #define STD_FUNCTION(arg) OneArgumentFunctions [#arg] = arg
256 
257 static int LoadOneArgumentFunctions ()
258  {
259  OneArgumentFunctions ["abs"] = fabs;
260  STD_FUNCTION (acos);
261  STD_FUNCTION (asin);
262  STD_FUNCTION (atan);
263 #ifndef WIN32 // doesn't seem to exist under Visual C++ 6
264  STD_FUNCTION (atanh);
265 #endif
266  STD_FUNCTION (ceil);
267  STD_FUNCTION (cos);
268  STD_FUNCTION (cosh);
269  STD_FUNCTION (exp);
270  STD_FUNCTION (exp);
271  STD_FUNCTION (floor);
272  STD_FUNCTION (log);
273  STD_FUNCTION (log10);
274  STD_FUNCTION (sin);
275  STD_FUNCTION (sinh);
276  STD_FUNCTION (sqrt);
277  STD_FUNCTION (tan);
278  STD_FUNCTION (tanh);
279 
280  OneArgumentFunctions ["int"] = DoInt;
281  OneArgumentFunctions ["rand"] = DoRandom;
282  OneArgumentFunctions ["rand"] = DoRandom;
283  OneArgumentFunctions ["percent"] = DoPercent;
284  return 0;
285  } // end of LoadOneArgumentFunctions
286 
287 static int LoadTwoArgumentFunctions ()
288  {
289  TwoArgumentFunctions ["min"] = DoMin;
290  TwoArgumentFunctions ["max"] = DoMax;
291  TwoArgumentFunctions ["mod"] = DoFmod;
292  TwoArgumentFunctions ["pow"] = DoPow; // x to the power y
293  TwoArgumentFunctions ["roll"] = DoRoll; // dice roll
294  return 0;
295  } // end of LoadTwoArgumentFunctions
296 
297 static int LoadThreeArgumentFunctions ()
298  {
299  ThreeArgumentFunctions ["if"] = DoIf;
300  return 0;
301  } // end of LoadThreeArgumentFunctions
302 
303 const Parser::TokenType Parser::GetToken (const bool ignoreSign)
304  {
305  word_.erase (0, std::string::npos);
306 
307  // skip spaces
308  while (*pWord_ && isspace (*pWord_))
309  ++pWord_;
310 
311  pWordStart_ = pWord_; // remember where word_ starts *now*
312 
313  // look out for unterminated statements and things
314  if (*pWord_ == 0 && // we have EOF
315  type_ == END) // after already detecting it
316  throw std::runtime_error ("Unexpected end of expression.");
317 
318  unsigned char cFirstCharacter = *pWord_; // first character in new word_
319 
320  if (cFirstCharacter == 0) // stop at end of file
321  {
322  word_ = "<end of expression>";
323  return type_ = END;
324  }
325 
326  unsigned char cNextCharacter = *(pWord_ + 1); // 2nd character in new word_
327 
328  // look for number
329  // can be: + or - followed by a decimal point
330  // or: + or - followed by a digit
331  // or: starting with a digit
332  // or: decimal point followed by a digit
333  if ((!ignoreSign &&
334  (cFirstCharacter == '+' || cFirstCharacter == '-') &&
335  (isdigit (cNextCharacter) || cNextCharacter == '.')
336  )
337  || isdigit (cFirstCharacter)
338  // allow decimal numbers without a leading 0. e.g. ".5"
339  // Dennis Jones 01-30-2009
340  || (cFirstCharacter == '.' && isdigit (cNextCharacter)) )
341  {
342  // skip sign for now
343  if ((cFirstCharacter == '+' || cFirstCharacter == '-'))
344  pWord_++;
345  while (isdigit (*pWord_) || *pWord_ == '.')
346  pWord_++;
347 
348  // allow for 1.53158e+15
349  if (*pWord_ == 'e' || *pWord_ == 'E')
350  {
351  pWord_++; // skip 'e'
352  if ((*pWord_ == '+' || *pWord_ == '-'))
353  pWord_++; // skip sign after e
354  while (isdigit (*pWord_)) // now digits after e
355  pWord_++;
356  }
357 
358  word_ = std::string (pWordStart_, pWord_ - pWordStart_);
359 
360  std::istringstream is (word_);
361  // parse std::string into double value
362  is >> value_;
363 
364  if (is.fail () && !is.eof ())
365  throw std::runtime_error ("Bad numeric literal: " + word_);
366  return type_ = NUMBER;
367  } // end of number found
368 
369  // special test for 2-character sequences: <= >= == !=
370  // also +=, -=, /=, *=
371  if (cNextCharacter == '=')
372  {
373  switch (cFirstCharacter)
374  {
375  // comparisons
376  case '=': type_ = EQ; break;
377  case '<': type_ = LE; break;
378  case '>': type_ = GE; break;
379  case '!': type_ = NE; break;
380  // assignments
381  case '+': type_ = ASSIGN_ADD; break;
382  case '-': type_ = ASSIGN_SUB; break;
383  case '*': type_ = ASSIGN_MUL; break;
384  case '/': type_ = ASSIGN_DIV; break;
385  // none of the above
386  default: type_ = NONE; break;
387  } // end of switch on cFirstCharacter
388 
389  if (type_ != NONE)
390  {
391  word_ = std::string (pWordStart_, 2);
392  pWord_ += 2; // skip both characters
393  return type_;
394  } // end of found one
395  } // end of *=
396 
397  switch (cFirstCharacter)
398  {
399  case '&': if (cNextCharacter == '&') // &&
400  {
401  word_ = std::string (pWordStart_, 2);
402  pWord_ += 2; // skip both characters
403  return type_ = AND;
404  }
405  break;
406  case '|': if (cNextCharacter == '|') // ||
407  {
408  word_ = std::string (pWordStart_, 2);
409  pWord_ += 2; // skip both characters
410  return type_ = OR;
411  }
412  break;
413  // single-character symboles
414  case '=':
415  case '<':
416  case '>':
417  case '+':
418  case '-':
419  case '/':
420  case '*':
421  case '(':
422  case ')':
423  case ',':
424  case '!':
425  word_ = std::string (pWordStart_, 1);
426  ++pWord_; // skip it
427  return type_ = TokenType (cFirstCharacter);
428  } // end of switch on cFirstCharacter
429 
430  if (!isalpha (cFirstCharacter))
431  {
432  if (cFirstCharacter < ' ')
433  {
434  std::ostringstream s;
435  s << "Unexpected character (decimal " << int (cFirstCharacter) << ")";
436  throw std::runtime_error (s.str ());
437  }
438  else
439  throw std::runtime_error ("Unexpected character: " + std::string (1, cFirstCharacter));
440  }
441 
442  // we have a word (starting with A-Z) - pull it out
443  while (isalnum (*pWord_) || *pWord_ == '_')
444  ++pWord_;
445 
446  word_ = std::string (pWordStart_, pWord_ - pWordStart_);
447  return type_ = NAME;
448  } // end of Parser::GetToken
449 
450 // force load of functions at static initialisation time
451 static int doLoadOneArgumentFunctions = LoadOneArgumentFunctions ();
452 static int doLoadTwoArgumentFunctions = LoadTwoArgumentFunctions ();
453 static int doLoadThreeArgumentFunctions = LoadThreeArgumentFunctions ();
454 
455 const double Parser::Primary (const bool get) // primary (base) tokens
456  {
457 
458  if (get)
459  GetToken (); // one-token lookahead
460 
461  switch (type_)
462  {
463  case NUMBER:
464  {
465  double v = value_;
466  GetToken (true); // get next one (one-token lookahead)
467  return v;
468  }
469 
470  case NAME:
471  {
472  std::string word = word_;
473  GetToken (true);
474  if (type_ == LHPAREN)
475  {
476  // might be single-argument function (eg. abs (x) )
477  std::map<std::string, OneArgFunction>::const_iterator si;
478  si = OneArgumentFunctions.find (word);
479  if (si != OneArgumentFunctions.end ())
480  {
481  double v = Expression (true); // get argument
482  CheckToken (RHPAREN);
483  GetToken (true); // get next one (one-token lookahead)
484  return si->second (v); // evaluate function
485  }
486 
487  // might be double-argument function (eg. roll (6, 2) )
488  std::map<std::string, TwoArgFunction>::const_iterator di;
489  di = TwoArgumentFunctions.find (word);
490  if (di != TwoArgumentFunctions.end ())
491  {
492  double v1 = Expression (true); // get argument 1 (not commalist)
493  CheckToken (COMMA);
494  double v2 = Expression (true); // get argument 2 (not commalist)
495  CheckToken (RHPAREN);
496  GetToken (true); // get next one (one-token lookahead)
497  return di->second (v1, v2); // evaluate function
498  }
499 
500  // might be double-argument function (eg. roll (6, 2) )
501  std::map<std::string, ThreeArgFunction>::const_iterator ti;
502  ti = ThreeArgumentFunctions.find (word);
503  if (ti != ThreeArgumentFunctions.end ())
504  {
505  double v1 = Expression (true); // get argument 1 (not commalist)
506  CheckToken (COMMA);
507  double v2 = Expression (true); // get argument 2 (not commalist)
508  CheckToken (COMMA);
509  double v3 = Expression (true); // get argument 3 (not commalist)
510  CheckToken (RHPAREN);
511  GetToken (true); // get next one (one-token lookahead)
512  return ti->second (v1, v2, v3); // evaluate function
513  }
514 
515  throw std::runtime_error ("Function '" + word + "' not implemented.");
516  }
517 
518  // not a function? must be a symbol in the symbol table
519  double & v = symbols_ [word]; // get REFERENCE to symbol table entry
520  // change table entry with expression? (eg. a = 22, or a = 22)
521  switch (type_)
522  {
523  // maybe check for NaN or Inf here (see: isinf, isnan functions)
524  case ASSIGN: v = Expression (true); break;
525  case ASSIGN_ADD: v += Expression (true); break;
526  case ASSIGN_SUB: v -= Expression (true); break;
527  case ASSIGN_MUL: v *= Expression (true); break;
528  case ASSIGN_DIV:
529  {
530  double d = Expression (true);
531  if (d == 0.0)
532  throw std::runtime_error ("Divide by zero");
533  v /= d;
534  break; // change table entry with expression
535  } // end of ASSIGN_DIV
536  default: break; // do nothing for others
537  } // end of switch on type_
538  return v; // and return new value
539  }
540 
541  case MINUS: // unary minus
542  return - Primary (true);
543 
544  case NOT: // unary not
545  return (Primary (true) == 0.0) ? 1.0 : 0.0;;
546 
547  case LHPAREN:
548  {
549  double v = CommaList (true); // inside parens, you could have commas
550  CheckToken (RHPAREN);
551  GetToken (true); // eat the )
552  return v;
553  }
554 
555  default:
556  throw std::runtime_error ("Unexpected token: " + word_);
557 
558  } // end of switch on type
559 
560  } // end of Parser::Primary
561 
562 const double Parser::Term (const bool get) // multiply and divide
563  {
564  double left = Primary (get);
565  while (true)
566  {
567  switch (type_)
568  {
569  case MULTIPLY:
570  left *= Primary (true); break;
571  case DIVIDE:
572  {
573  double d = Primary (true);
574  if (d == 0.0)
575  throw std::runtime_error ("Divide by zero");
576  left /= d;
577  break;
578  }
579  default: return left;
580  } // end of switch on type
581  } // end of loop
582  } // end of Parser::Term
583 
584 const double Parser::AddSubtract (const bool get) // add and subtract
585  {
586  double left = Term (get);
587  while (true)
588  {
589  switch (type_)
590  {
591  case PLUS: left += Term (true); break;
592  case MINUS: left -= Term (true); break;
593  default: return left;
594  } // end of switch on type
595  } // end of loop
596  } // end of Parser::AddSubtract
597 
598 const double Parser::Comparison (const bool get) // LT, GT, LE, EQ etc.
599  {
600  double left = AddSubtract (get);
601  while (true)
602  {
603  switch (type_)
604  {
605  case LT: left = left < AddSubtract (true) ? 1.0 : 0.0; break;
606  case GT: left = left > AddSubtract (true) ? 1.0 : 0.0; break;
607  case LE: left = left <= AddSubtract (true) ? 1.0 : 0.0; break;
608  case GE: left = left >= AddSubtract (true) ? 1.0 : 0.0; break;
609  case EQ: left = left == AddSubtract (true) ? 1.0 : 0.0; break;
610  case NE: left = left != AddSubtract (true) ? 1.0 : 0.0; break;
611  default: return left;
612  } // end of switch on type
613  } // end of loop
614  } // end of Parser::Comparison
615 
616 const double Parser::Expression (const bool get) // AND and OR
617  {
618  double left = Comparison (get);
619  while (true)
620  {
621  switch (type_)
622  {
623  case AND:
624  {
625  double d = Comparison (true); // don't want short-circuit evaluation
626  left = (left != 0.0) && (d != 0.0);
627  }
628  break;
629  case OR:
630  {
631  double d = Comparison (true); // don't want short-circuit evaluation
632  left = (left != 0.0) || (d != 0.0);
633  }
634  break;
635  default: return left;
636  } // end of switch on type
637  } // end of loop
638  } // end of Parser::Expression
639 
640 const double Parser::CommaList (const bool get) // expr1, expr2
641  {
642  double left = Expression (get);
643  while (true)
644  {
645  switch (type_)
646  {
647  case COMMA: left = Expression (true); break; // discard previous value
648  default: return left;
649  } // end of switch on type
650  } // end of loop
651  } // end of Parser::CommaList
652 
653 const double Parser::Evaluate () // get result
654  {
655  pWord_ = program_.c_str ();
656  type_ = NONE;
657  double v = CommaList (true);
658  if (type_ != END)
659  throw std::runtime_error ("Unexpected text at end of expression: " + std::string (pWordStart_));
660  return v;
661  }
662 
663 // change program and evaluate it
664 const double Parser::Evaluate (const std::string & program) // get result
665  {
666  program_ = program;
667  return Evaluate ();
668  }
669 
670 }
671 
std::map< std::string, double > symbols_
Symbol table mapping variable names to values. Can be accessed directly.
Definition: parser.h:122
TokenType
Token types used by the expression lexer.
Definition: parser.h:54
@ LHPAREN
Left parenthesis.
Definition: parser.h:64
@ NE
Not equal comparison (!=)
Definition: parser.h:74
@ RHPAREN
Right parenthesis.
Definition: parser.h:65
@ NOT
Logical NOT operator.
Definition: parser.h:67
@ EQ
Equal comparison (==)
Definition: parser.h:73
@ ASSIGN_DIV
Compound assignment (/=)
Definition: parser.h:81
@ DIVIDE
Division operator.
Definition: parser.h:62
@ MULTIPLY
Multiplication operator.
Definition: parser.h:61
@ NUMBER
Numeric literal.
Definition: parser.h:57
@ END
End of expression.
Definition: parser.h:58
@ OR
Logical OR (||)
Definition: parser.h:76
@ GE
Greater than or equal (>=)
Definition: parser.h:72
@ AND
Logical AND (&&)
Definition: parser.h:75
@ NAME
Identifier or variable name.
Definition: parser.h:56
@ LT
Less than comparison.
Definition: parser.h:69
@ ASSIGN_MUL
Compound assignment (*=)
Definition: parser.h:80
@ COMMA
Comma delimiter.
Definition: parser.h:66
@ ASSIGN_SUB
Compound assignment (-=)
Definition: parser.h:79
@ LE
Less than or equal (<=)
Definition: parser.h:71
@ NONE
No token (error state)
Definition: parser.h:55
@ GT
Greater than comparison.
Definition: parser.h:70
@ MINUS
Subtraction operator.
Definition: parser.h:60
@ PLUS
Addition operator.
Definition: parser.h:59
@ ASSIGN_ADD
Compound assignment (+=)
Definition: parser.h:78
@ ASSIGN
Assignment operator.
Definition: parser.h:63
const double Evaluate()
Evaluate the stored expression using current symbol values.
Definition: parser.cpp:653
Definition: bank.cpp:47
const double(* ThreeArgFunction)(const double arg1, const double arg2, const double arg3)
Definition: parser.cpp:247
const bool percent(const int prob)
Definition: parser.cpp:81
double DoInt(double arg)
Definition: parser.cpp:191
const double DoRoll(const double arg1, const double arg2)
Definition: parser.cpp:232
const double DoIf(const double arg1, const double arg2, const double arg3)
Definition: parser.cpp:237
const int getrandom(const int x)
Definition: parser.cpp:55
const int roll(const int howmany, const int die)
Definition: parser.cpp:66
double DoRandom(double arg)
Definition: parser.cpp:196
double DoPercent(double arg)
Definition: parser.cpp:201
const double DoMin(const double arg1, const double arg2)
Definition: parser.cpp:209
const double DoPow(const double arg1, const double arg2)
Definition: parser.cpp:227
double(* OneArgFunction)(double arg)
Definition: parser.cpp:245
const double DoFmod(const double arg1, const double arg2)
Definition: parser.cpp:219
const double(* TwoArgFunction)(const double arg1, const double arg2)
Definition: parser.cpp:246
const double DoMax(const double arg1, const double arg2)
Definition: parser.cpp:214
#define STD_FUNCTION(arg)
Definition: parser.cpp:255
Mathematical expression parser for arithmetic and conditional evaluation.