pgbench: Add a real expression syntax to \set
authorRobert Haas
Mon, 2 Mar 2015 19:21:41 +0000 (14:21 -0500)
committerRobert Haas
Mon, 2 Mar 2015 19:21:41 +0000 (14:21 -0500)
Previously, you could do \set variable operand1 operator operand2, but
nothing more complicated.  Now, you can \set variable expression, which
makes it much simpler to do multi-step calculations here.  This also
adds support for the modulo operator (%), with the same semantics as in
C.

Robert Haas and Fabien Coelho, reviewed by Álvaro Herrera and
Stephen Frost

contrib/pgbench/.gitignore
contrib/pgbench/Makefile
contrib/pgbench/exprparse.y [new file with mode: 0644]
contrib/pgbench/exprscan.l [new file with mode: 0644]
contrib/pgbench/pgbench.c
contrib/pgbench/pgbench.h [new file with mode: 0644]
doc/src/sgml/pgbench.sgml
src/tools/msvc/Mkvcbuild.pm

index 489a2d62d0685e2ba31ccf2be7ff3d277d87350a..aae819ed70f9f4e7d8cdcd26ef0094b7d8d883e9 100644 (file)
@@ -1 +1,3 @@
+/exprparse.c
+/exprscan.c
 /pgbench
index b8e2fc841ec38b252e0a65cc41a55207ce5284c2..6d132228dd7848c350ef008825d33eed50bb16d9 100644 (file)
@@ -4,7 +4,9 @@ PGFILEDESC = "pgbench - a simple program for running benchmark tests"
 PGAPPICON = win32
 
 PROGRAM = pgbench
-OBJS   = pgbench.o $(WIN32RES)
+OBJS   = pgbench.o exprparse.o $(WIN32RES)
+
+EXTRA_CLEAN    = exprparse.c exprscan.c
 
 PG_CPPFLAGS = -I$(libpq_srcdir)
 PG_LIBS = $(libpq_pgport) $(PTHREAD_LIBS)
@@ -18,8 +20,21 @@ subdir = contrib/pgbench
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
+
+distprep: exprparse.c exprscan.c
 endif
 
 ifneq ($(PORTNAME), win32)
 override CFLAGS += $(PTHREAD_CFLAGS)
 endif
+
+# There is no correct way to write a rule that generates two files.
+# Rules with two targets don't have that meaning, they are merely
+# shorthand for two otherwise separate rules.  To be safe for parallel
+# make, we must chain the dependencies like this.  The semicolon is
+# important; otherwise, make will choose the built-in rule.
+
+exprparse.h: exprparse.c ;
+
+# exprscan is compiled as part of exprparse
+exprparse.o: exprscan.c
diff --git a/contrib/pgbench/exprparse.y b/contrib/pgbench/exprparse.y
new file mode 100644 (file)
index 0000000..243c6b9
--- /dev/null
@@ -0,0 +1,96 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * exprparse.y
+ *   bison grammar for a simple expression syntax
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "pgbench.h"
+
+PgBenchExpr *expr_parse_result;
+
+static PgBenchExpr *make_integer_constant(int64 ival);
+static PgBenchExpr *make_variable(char *varname);
+static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
+       PgBenchExpr *rexpr);
+
+%}
+
+%expect 0
+%name-prefix="expr_yy"
+
+%union
+{
+   int64       ival;
+   char       *str;
+   PgBenchExpr *expr;
+}
+
+%type  expr
+%type  INTEGER
+%type  VARIABLE
+%token INTEGER VARIABLE
+%token CHAR_ERROR /* never used, will raise a syntax error */
+
+%left  '+' '-'
+%left  '*' '/' '%'
+%right UMINUS
+
+%%
+
+result: expr               { expr_parse_result = $1; }
+
+expr: '(' expr ')'         { $$ = $2; }
+   | '+' expr %prec UMINUS { $$ = $2; }
+   | '-' expr %prec UMINUS { $$ = make_op('-', make_integer_constant(0), $2); }
+   | expr '+' expr         { $$ = make_op('+', $1, $3); }
+   | expr '-' expr         { $$ = make_op('-', $1, $3); }
+   | expr '*' expr         { $$ = make_op('*', $1, $3); }
+   | expr '/' expr         { $$ = make_op('/', $1, $3); }
+   | expr '%' expr         { $$ = make_op('%', $1, $3); }
+   | INTEGER               { $$ = make_integer_constant($1); }
+   | VARIABLE              { $$ = make_variable($1); }
+   ;
+
+%%
+
+static PgBenchExpr *
+make_integer_constant(int64 ival)
+{
+   PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+   expr->etype = ENODE_INTEGER_CONSTANT;
+   expr->u.integer_constant.ival = ival;
+   return expr;
+}
+
+static PgBenchExpr *
+make_variable(char *varname)
+{
+   PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+   expr->etype = ENODE_VARIABLE;
+   expr->u.variable.varname = varname;
+   return expr;
+}
+
+static PgBenchExpr *
+make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+{
+   PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+   expr->etype = ENODE_OPERATOR;
+   expr->u.operator.operator = operator;
+   expr->u.operator.lexpr = lexpr;
+   expr->u.operator.rexpr = rexpr;
+   return expr;
+}
+
+#include "exprscan.c"
diff --git a/contrib/pgbench/exprscan.l b/contrib/pgbench/exprscan.l
new file mode 100644 (file)
index 0000000..4c9229c
--- /dev/null
@@ -0,0 +1,105 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * exprscan.l
+ *   a lexical scanner for a simple expression syntax
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* line and column number for error reporting */
+static int yyline = 0, yycol = 0;
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+static int scanbuflen;
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+int expr_yylex(void);
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="expr_yy"
+
+non_newline        [^\n\r]
+space          [ \t\r\f]
+
+%%
+
+"+"                { yycol += yyleng; return '+'; }
+"-"                { yycol += yyleng; return '-'; }
+"*"                { yycol += yyleng; return '*'; }
+"/"                { yycol += yyleng; return '/'; }
+"%"                { yycol += yyleng; return '%'; }
+"("                { yycol += yyleng; return '('; }
+")"                { yycol += yyleng; return ')'; }
+:[a-zA-Z0-9_]+ { yycol += yyleng; yylval.str = pg_strdup(yytext + 1); return VARIABLE; }
+[0-9]+         { yycol += yyleng; yylval.ival = strtoint64(yytext); return INTEGER; }
+
+[\n]           { yycol = 0; yyline++; }
+{space}            { yycol += yyleng; /* ignore */ }
+
+.              {
+                   yycol += yyleng;
+                   fprintf(stderr, "unexpected character '%s'\n", yytext);
+                   return CHAR_ERROR;
+               }
+%%
+
+void
+yyerror(const char *message)
+{
+   /* yyline is always 1 as pgbench calls the parser for each line...
+    * so the interesting location information is the column number */
+   fprintf(stderr, "%s at column %d\n", message, yycol);
+   /* go on to raise the error from pgbench with more information */
+   /* exit(1); */
+}
+
+/*
+ * Called before any actual parsing is done
+ */
+void
+expr_scanner_init(const char *str)
+{
+   Size    slen = strlen(str);
+
+   /*
+    * Might be left over after error
+    */
+   if (YY_CURRENT_BUFFER)
+       yy_delete_buffer(YY_CURRENT_BUFFER);
+
+   /*
+    * Make a scan buffer with special termination needed by flex.
+    */
+   scanbuflen = slen;
+   scanbuf = pg_malloc(slen + 2);
+   memcpy(scanbuf, str, slen);
+   scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+   scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+   BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after seg_scanner_init()
+ */
+void
+expr_scanner_finish(void)
+{
+   yy_delete_buffer(scanbufhandle);
+   pg_free(scanbuf);
+}
index ddd11a09c5b1762b59a91f2c2b5be54dc9a903ed..706fdf5b197bb328ae829dc3639409ac2e42e0b9 100644 (file)
@@ -57,6 +57,8 @@
 #define M_PI 3.14159265358979323846
 #endif
 
+#include "pgbench.h"
+
 /*
  * Multi-platform pthread implementations
  */
@@ -289,6 +291,7 @@ typedef struct
    int         type;           /* command type (SQL_COMMAND or META_COMMAND) */
    int         argc;           /* number of command words */
    char       *argv[MAX_ARGS]; /* command word list */
+   PgBenchExpr *expr;          /* parsed expression */
 } Command;
 
 typedef struct
@@ -423,7 +426,7 @@ usage(void)
  * This function is a modified version of scanint8() from
  * src/backend/utils/adt/int8.c.
  */
-static int64
+int64
 strtoint64(const char *str)
 {
    const char *ptr = str;
@@ -879,6 +882,91 @@ getQueryParams(CState *st, const Command *command, const char **params)
        params[i] = getVariable(st, command->argv[i + 1]);
 }
 
+/*
+ * Recursive evaluation of an expression in a pgbench script
+ * using the current state of variables.
+ * Returns whether the evaluation was ok,
+ * the value itself is returned through the retval pointer.
+ */
+static bool
+evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+{
+   switch (expr->etype)
+   {
+       case ENODE_INTEGER_CONSTANT:
+           {
+               *retval = expr->u.integer_constant.ival;
+               return true;
+           }
+
+       case ENODE_VARIABLE:
+           {
+               char       *var;
+
+               if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
+               {
+                   fprintf(stderr, "undefined variable %s\n",
+                       expr->u.variable.varname);
+                   return false;
+               }
+               *retval = strtoint64(var);
+               return true;
+           }
+
+       case ENODE_OPERATOR:
+           {
+               int64   lval;
+               int64   rval;
+
+               if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
+                   return false;
+               if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
+                   return false;
+               switch (expr->u.operator.operator)
+               {
+                   case '+':
+                       *retval = lval + rval;
+                       return true;
+
+                   case '-':
+                       *retval = lval - rval;
+                       return true;
+
+                   case '*':
+                       *retval = lval * rval;
+                       return true;
+
+                   case '/':
+                       if (rval == 0)
+                       {
+                           fprintf(stderr, "division by zero\n");
+                           return false;
+                       }
+                       *retval = lval / rval;
+                       return true;
+
+                   case '%':
+                       if (rval == 0)
+                       {
+                           fprintf(stderr, "division by zero\n");
+                           return false;
+                       }
+                       *retval = lval % rval;
+                       return true;
+               }
+
+               fprintf(stderr, "bad operator\n");
+               return false;
+           }
+
+       default:
+           break;
+   }
+
+   fprintf(stderr, "bad expression\n");
+   return false;
+}
+
 /*
  * Run a shell command. The result is assigned to the variable if not NULL.
  * Return true if succeeded, or false on error.
@@ -1515,64 +1603,16 @@ top:
        }
        else if (pg_strcasecmp(argv[0], "set") == 0)
        {
-           char       *var;
-           int64       ope1,
-                       ope2;
            char        res[64];
+           PgBenchExpr *expr = commands[st->state]->expr;
+           int64       result;
 
-           if (*argv[2] == ':')
+           if (!evaluateExpr(st, expr, &result))
            {
-               if ((var = getVariable(st, argv[2] + 1)) == NULL)
-               {
-                   fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]);
-                   st->ecnt++;
-                   return true;
-               }
-               ope1 = strtoint64(var);
-           }
-           else
-               ope1 = strtoint64(argv[2]);
-
-           if (argc < 5)
-               snprintf(res, sizeof(res), INT64_FORMAT, ope1);
-           else
-           {
-               if (*argv[4] == ':')
-               {
-                   if ((var = getVariable(st, argv[4] + 1)) == NULL)
-                   {
-                       fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[4]);
-                       st->ecnt++;
-                       return true;
-                   }
-                   ope2 = strtoint64(var);
-               }
-               else
-                   ope2 = strtoint64(argv[4]);
-
-               if (strcmp(argv[3], "+") == 0)
-                   snprintf(res, sizeof(res), INT64_FORMAT, ope1 + ope2);
-               else if (strcmp(argv[3], "-") == 0)
-                   snprintf(res, sizeof(res), INT64_FORMAT, ope1 - ope2);
-               else if (strcmp(argv[3], "*") == 0)
-                   snprintf(res, sizeof(res), INT64_FORMAT, ope1 * ope2);
-               else if (strcmp(argv[3], "/") == 0)
-               {
-                   if (ope2 == 0)
-                   {
-                       fprintf(stderr, "%s: division by zero\n", argv[0]);
-                       st->ecnt++;
-                       return true;
-                   }
-                   snprintf(res, sizeof(res), INT64_FORMAT, ope1 / ope2);
-               }
-               else
-               {
-                   fprintf(stderr, "%s: unsupported operator %s\n", argv[0], argv[3]);
-                   st->ecnt++;
-                   return true;
-               }
+               st->ecnt++;
+               return true;
            }
+           sprintf(res, INT64_FORMAT, result);
 
            if (!putVariable(st, argv[0], argv[1], res))
            {
@@ -2151,7 +2191,7 @@ parseQuery(Command *cmd, const char *raw_sql)
 
 /* Parse a command; return a Command struct, or NULL if it's a comment */
 static Command *
-process_commands(char *buf)
+process_commands(char *buf, const char *source, const int lineno)
 {
    const char  delim[] = " \f\n\r\t\v";
 
@@ -2182,16 +2222,23 @@ process_commands(char *buf)
 
    if (*p == '\\')
    {
+       int     max_args = -1;
        my_commands->type = META_COMMAND;
 
        j = 0;
        tok = strtok(++p, delim);
 
+       if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
+           max_args = 2;
+
        while (tok != NULL)
        {
            my_commands->argv[j++] = pg_strdup(tok);
            my_commands->argc++;
-           tok = strtok(NULL, delim);
+           if (max_args >= 0 && my_commands->argc >= max_args)
+               tok = strtok(NULL, "");
+           else
+               tok = strtok(NULL, delim);
        }
 
        if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
@@ -2250,9 +2297,17 @@ process_commands(char *buf)
                exit(1);
            }
 
-           for (j = my_commands->argc < 5 ? 3 : 5; j < my_commands->argc; j++)
-               fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
-                       my_commands->argv[0], my_commands->argv[j]);
+           expr_scanner_init(my_commands->argv[2]);
+
+           if (expr_yyparse() != 0)
+           {
+               fprintf(stderr, "%s: parse error\n", my_commands->argv[0]);
+               exit(1);
+           }
+
+           my_commands->expr = expr_parse_result;
+
+           expr_scanner_finish();
        }
        else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
        {
@@ -2393,7 +2448,7 @@ process_file(char *filename)
 
    Command   **my_commands;
    FILE       *fd;
-   int         lineno;
+   int         lineno, index;
    char       *buf;
    int         alloc_num;
 
@@ -2416,22 +2471,24 @@ process_file(char *filename)
    }
 
    lineno = 0;
+   index = 0;
 
    while ((buf = read_line_from_file(fd)) != NULL)
    {
        Command    *command;
+       lineno += 1;
 
-       command = process_commands(buf);
+       command = process_commands(buf, filename, lineno);
 
        free(buf);
 
        if (command == NULL)
            continue;
 
-       my_commands[lineno] = command;
-       lineno++;
+       my_commands[index] = command;
+       index++;
 
-       if (lineno >= alloc_num)
+       if (index >= alloc_num)
        {
            alloc_num += COMMANDS_ALLOC_NUM;
            my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
@@ -2439,7 +2496,7 @@ process_file(char *filename)
    }
    fclose(fd);
 
-   my_commands[lineno] = NULL;
+   my_commands[index] = NULL;
 
    sql_files[num_files++] = my_commands;
 
@@ -2447,12 +2504,12 @@ process_file(char *filename)
 }
 
 static Command **
-process_builtin(char *tb)
+process_builtin(char *tb, const char *source)
 {
 #define COMMANDS_ALLOC_NUM 128
 
    Command   **my_commands;
-   int         lineno;
+   int         lineno, index;
    char        buf[BUFSIZ];
    int         alloc_num;
 
@@ -2460,6 +2517,7 @@ process_builtin(char *tb)
    my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
 
    lineno = 0;
+   index = 0;
 
    for (;;)
    {
@@ -2478,21 +2536,23 @@ process_builtin(char *tb)
 
        *p = '\0';
 
-       command = process_commands(buf);
+       lineno += 1;
+
+       command = process_commands(buf, source, lineno);
        if (command == NULL)
            continue;
 
-       my_commands[lineno] = command;
-       lineno++;
+       my_commands[index] = command;
+       index++;
 
-       if (lineno >= alloc_num)
+       if (index >= alloc_num)
        {
            alloc_num += COMMANDS_ALLOC_NUM;
            my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
        }
    }
 
-   my_commands[lineno] = NULL;
+   my_commands[index] = NULL;
 
    return my_commands;
 }
@@ -3222,17 +3282,20 @@ main(int argc, char **argv)
    switch (ttype)
    {
        case 0:
-           sql_files[0] = process_builtin(tpc_b);
+           sql_files[0] = process_builtin(tpc_b,
+                                          "");
            num_files = 1;
            break;
 
        case 1:
-           sql_files[0] = process_builtin(select_only);
+           sql_files[0] = process_builtin(select_only,
+                                          "");
            num_files = 1;
            break;
 
        case 2:
-           sql_files[0] = process_builtin(simple_update);
+           sql_files[0] = process_builtin(simple_update,
+                                          "");
            num_files = 1;
            break;
 
diff --git a/contrib/pgbench/pgbench.h b/contrib/pgbench/pgbench.h
new file mode 100644 (file)
index 0000000..128bf11
--- /dev/null
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgbench.h
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGBENCH_H
+#define PGBENCH_H
+
+typedef enum PgBenchExprType
+{
+   ENODE_INTEGER_CONSTANT,
+   ENODE_VARIABLE,
+   ENODE_OPERATOR
+} PgBenchExprType;
+
+struct PgBenchExpr;
+typedef struct PgBenchExpr PgBenchExpr;
+
+struct PgBenchExpr
+{
+   PgBenchExprType etype;
+   union
+   {
+       struct
+       {
+           int64 ival;
+       } integer_constant;
+       struct
+       {
+           char *varname;
+       } variable;
+       struct
+       {
+           char operator;
+           PgBenchExpr *lexpr;
+           PgBenchExpr *rexpr;
+       } operator;
+   } u;
+};
+
+extern PgBenchExpr *expr_parse_result;
+
+extern int      expr_yyparse(void);
+extern int      expr_yylex(void);
+extern void expr_yyerror(const char *str);
+extern void expr_scanner_init(const char *str);
+extern void expr_scanner_finish(void);
+
+extern int64 strtoint64(const char *str);
+
+#endif
index 7d203cda84c7740197d97510dbaa81700808e737..16b82a3bbe04337858b7df7020956091cc93c3a1 100644 (file)
@@ -751,22 +751,25 @@ pgbench  options  dbname
   
    
     
-     \set varname operand1 [ operator operand2 ]>
+     \set varname expression>
     
 
     
      
-      Sets variable varname to a calculated integer value.
-      Each operand is either an integer constant or a
-      :variablename reference to a variable
-      having an integer value.  The operator can be
-      +, -, *, or /.
+      Sets variable varname to an integer value calculated
+      from expression.
+      The expression may contain integer constants such as 5432,
+      references to variables :variablename,
+      and expressions composed of unary (-) or binary operators
+      (+, -, *, /, %)
+      with their usual associativity, and parentheses.
      
 
      
-      Example:
+      Examples:
 
 \set ntellers 10 * :scale
+\set aid (1021 * :aid) % (100000 * :scale) + 1
 
     
    
index dba9b631688457a821eeef24cf90d7ffbaac2c3e..5dc8426b9351dacc0610d7f7d41c87e1e2d2ad4b 100644 (file)
@@ -49,6 +49,7 @@ my $contrib_extraincludes =
   { 'tsearch2' => ['contrib/tsearch2'], 'dblink' => ['src/backend'] };
 my $contrib_extrasource = {
    'cube' => [ 'cubescan.l', 'cubeparse.y' ],
+   'pgbench' => [ 'exprscan.l', 'exprparse.y' ],
    'seg'  => [ 'segscan.l',  'segparse.y' ], };
 my @contrib_excludes = ('pgcrypto', 'intagg', 'sepgsql');