/*
   File: textparsing.c
   Defines the routines to parse terminals and terminal sets
   for recursive backup parsing.
*/

/* global includes */
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>

/* local includes */
#include <export.h>
#include <error.h>
#include <memalloc.h>
#include <textstorage.h>
#include <ds.h>
#include <trace.h>
#include <textparsing.h>

public int parsebuffer_size = 100000;
public int strstore_size = 100000;
public char *parsebuffer;
public char *iptr;
private int startcol;
public int nrofparses;
public char *strstore;

private void check_parsebuffer_length (int len)
	{ if (len < parsebuffer_size) return;
	  free (parsebuffer);
	  parsebuffer_size = len + 1;
	  parsebuffer = (char *) ckmalloc (parsebuffer_size);
	  wlog ("Dynamically adapting size of parse buffer");
	};

public void copy_into_parsebuffer (char *src, int len, int col)
	{ check_parsebuffer_length (len);
	  strncpy (parsebuffer, src, len);
	  parsebuffer[len] = EOFCHAR;
	  startcol = col;
	  iptr = parsebuffer;
	  nrofparses = 0;
	};

public void copy_file_into_parsebuffer (FILE *fd)
	{ struct stat buf;
	  int fdnr = fileno (fd);
	  int len = 0;
	  char *bufptr = parsebuffer;
	  char c;

	  if (fstat (fdnr, &buf) == -1) panic ("Could not stat input file\n");
	  if ((buf.st_mode & S_IFMT) == S_IFREG)
	     check_parsebuffer_length ((int) buf.st_size);
	  while ((c = fgetc (fd)) != EOF)
	     { *bufptr++ = c;
	       len++;
	       if (len == parsebuffer_size)
		  panic ("too much input from input file\n");
	     };
	  *bufptr = EOFCHAR;
	  iptr = parsebuffer;
	  nrofparses = 0;
	};

#define EMSGSIZE 100
public char pemsg[EMSGSIZE];			/* probable error message */
private char emsg[EMSGSIZE];
public char *eptr;				/* farthest point reached */
private void synerror ()
	{ if (iptr-eptr > 0)
	     { eptr = iptr;
	       strcpy (pemsg, emsg);	/* save error message */
	     };
	};

public void lookahead_failure (char *name)
	{ if (iptr-eptr > 0)
	     { eptr = iptr;
	       sprintf (pemsg, "lookahead failed in rule %s", name);
	     };
	};

/*
   make empty strstore
*/
public void make_empty_strstore ()
	{ char c = strstore[0];
	  strstore[0] = '\0';
	  callq ();
	  strstore[0] = c;
	  pushq (make_empty_strstore);
	};

/*
   end of sentence.
*/
public void endofsentence ()
	{ if (*iptr != EOFCHAR)
	     { sprintf (emsg, "end of parse expected");
	       synerror ();
	       pushq (endofsentence);
	       return;
	     }
	  callq ();
	  pushq (endofsentence);
	};

public void increment_nrparses ()
	{ nrofparses++;
	  if (tracing) wlog ("A parse was found");
	  callq ();
	  pushq (increment_nrparses);
	};

/*
   returns the current column
*/
public int currentcolumn ()
	{ char *tiptr = iptr;
	  int col = -1;
	  while (1)
	     { col++;
	       if (tiptr == parsebuffer)
		  { col += startcol;
		    break;
	          };
	       tiptr--;
	       if (*tiptr == '\n') break;
	     };
	  return (col);
	};

/*
   return the current row
*/
public int currentrow ()
	{ char *tiptr = parsebuffer;
	  int row = 0;
	  while (tiptr != iptr)
	     { if (*tiptr == '\n') row++;
	       tiptr++;
	     };
	  return (row);
	};

/*
   commit cut
*/
public void cut ()
	{ int *p = popip ();
	  *p = 1;
	  callq ();
	  puship (p);
	  pushq (cut);
	};

/*
   parse a terminal
*/
private int match_count;
public void parse_terminal ()
	{ char *terminal = tops();
	  char *riptr = iptr;
	  register char *tptr, *sptr;
	  match_count ++;
	  for (tptr = terminal, sptr = riptr; *tptr; tptr++, sptr++)
	     if (*tptr != *sptr)
	        { sprintf (emsg, "'%s' expected", terminal);
		  synerror ();
		  pushq (parse_terminal);
	          return;
		};
	  pop (1);
	  iptr = sptr;
	  callq ();
	  iptr = riptr;
	  pushs (terminal);
	  pushq (parse_terminal);
	};

public int in_set (char *ptr, char *set)
	{ register char *sptr;
	  for (sptr = set; *sptr; sptr++)
	     if (*ptr == *sptr) return (1);
	  return (0);
	};

public void fail_if_iptr_at_set ()
	{ char *set = tops ();
	  if (!in_set (iptr, set))
	     { pop (1);
	       callq ();
	       pushs (set);
	     };
	  pushq (fail_if_iptr_at_set);
	};

public void fail_if_iptr_not_at_set ()
	{ char *set = tops ();
	  if (in_set (iptr, set))
	     { pop (1);
	       callq ();
	       pushs (set);
	     };
	  pushq (fail_if_iptr_not_at_set);
	};

public void parse_set ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  if (!in_set (iptr, set))
	     { sprintf (emsg, "character in '%s' expected", set);
	       synerror ();
	       pushq (parse_set);
	       return;
	     };
	  strstore[0] = *iptr;
	  strstore[1] = '\0';
	  iptr++;
	  pop(1);
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_set);
	};

public void parse_non_set ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  if (in_set (iptr, set) || (*iptr == EOFCHAR))
	     { sprintf (emsg, "character not in '%s' expected", set);
	       synerror ();
	       pushq (parse_non_set);
	       return;
	     };
	  strstore[0] = *iptr;
	  strstore[1] = '\0';
	  iptr++;
	  pop(1);
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_non_set);
	};

public void parse_set_star ()
	{ char *set = pops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  int len = 0;
	  int count;

	  while (in_set (tiptr, set)) { tiptr++; len++; };
	  for (count = 0; count <= len; count++)
	     { strncpy (strstore, riptr, count);
	       strstore[count] = '\0';
	       iptr = riptr+count;
	       callq ();
	     };
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_set_star);
	};

public void parse_non_set_star ()
	{ char *set = pops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  int len = 0;
	  int count;

	  while (!in_set (tiptr, set) && (*tiptr != EOFCHAR))
	     { tiptr++; len++; };
	  for (count = 0; count <= len; count++)
	     { strncpy (strstore, riptr, count);
	       strstore[count] = '\0';
	       iptr = riptr+count;
	       callq ();
	     };
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_non_set_star);
	};

public void parse_set_plus ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  int len = 1;
	  int count;
	  if (!in_set (tiptr, set))
	     { sprintf (emsg, "character(s) in '%s' expected", set);
	       synerror ();
	       pushq (parse_set_plus);
	       return;
	     };
	  pop(1);
	  tiptr++;
	  while (in_set (tiptr, set)) { tiptr++; len++; };
	  for (count = 1; count <= len; count++)
	     { strncpy (strstore, riptr, count);
	       strstore[count] = '\0';
	       iptr = riptr+count;
	       callq ();
	     };
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_set_plus);
	};

public void parse_non_set_plus ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  int len = 1;
	  int count;
	  if (in_set (tiptr, set) || (*tiptr == EOFCHAR))
	     { sprintf (emsg, "character(s) not in '%s' expected", set);
	       synerror ();
	       pushq (parse_non_set_plus);
	       return;
	     };
	  pop(1);
	  tiptr++;
	  while (!in_set (tiptr, set) && (*tiptr != EOFCHAR))
	     { tiptr++; len++; };
	  for (count = 1; count <= len; count++)
	     { strncpy (strstore, riptr, count);
	       strstore[count] = '\0';
	       iptr = riptr+count;
	       callq ();
	     };
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_non_set_plus);
	};

public void parse_set_star_strict ()
	{ char *set = pops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  char *dptr = strstore;
	  while (in_set (tiptr, set)) { *dptr++ = *tiptr++; }
	  *dptr = '\0';
	  iptr = tiptr;
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_set_star_strict);
	};

public void parse_non_set_star_strict ()
	{ char *set = pops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  char *dptr = strstore;
 
	  while (!in_set (tiptr, set) && (*tiptr != EOFCHAR))
	     { *dptr++ = *tiptr++; }
	  *dptr = '\0';
	  iptr = tiptr;
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_non_set_star_strict);
	};

public void parse_set_plus_strict ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  char *dptr;
 
	  if (!in_set (tiptr, set))
	     { sprintf (emsg, "character(s) in '%s' expected", set);
	       synerror ();
	       pushq (parse_set_plus_strict);
	       return;
	     };
	  pop(1);
	  dptr = strstore;
	  do { *dptr++ = *tiptr++; }
	  while (in_set (tiptr, set));
	  *dptr = '\0';
	  iptr = tiptr;
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_set_plus_strict);
	};

public void parse_non_set_plus_strict ()
	{ char *set = tops ();
	  char *riptr = iptr;
	  char *tiptr = iptr;
	  char *dptr;
 
	  if (in_set (tiptr, set) || (*tiptr == EOFCHAR))
	     { sprintf (emsg, "character(s) not in '%s' expected", set);
	       synerror ();
	       pushq (parse_non_set_plus_strict);
	       return;
	     };
	  pop(1);
	  dptr = strstore;
	  do { *dptr++ = *tiptr++; }
	  while (!in_set (tiptr, set) && (*tiptr != EOFCHAR));
	  *dptr = '\0';
	  iptr = tiptr;
	  callq ();
	  iptr = riptr;
	  pushs (set);
	  pushq (parse_non_set_plus_strict);
	};

public void reinit_textparsing ()
	{ pemsg[0] = '\0';
	  eptr = parsebuffer;
	  nrofparses = 0;
	  match_count = 0;
	};

public void init_textparsing ()
	{ parsebuffer = (char *) ckmalloc (parsebuffer_size);
	  strstore = (char *) ckmalloc (strstore_size);
	  reinit_textparsing ();
	};

public void complain_on_found_parses ()
	{ if (nrofparses == 0)
	     { wlog ("No parse was found, due to: %s", pemsg);
	       if (eptr != (char *)0)
	          { eprint_log ("Farthest point reached was");
		    if (*eptr == EOFCHAR) eprint_log (" end of input\n");
		    else
		       { char *ptr;
			 eprint_log (":\n");
			 for (ptr = eptr;
			      (*ptr) && (*ptr != '\n') && (*ptr != EOFCHAR);
			      ptr++) eprint_log ("%c", *ptr);
			 eprint_log ("\n");
		       };
		  };
	     };
	  if (nrofparses > 1) wlog ("%d parses found", nrofparses);
	};

public void dump_matches ()
	{ wlog ("\n%d matches tried", match_count);
	};

/*
   Recognition of meta rules
*/
public char *miptr;
public int mtotal;

/*
   If we reach the end of the string, a meta rule has been recognized
   Save the miptr so that we may properly backtrack
*/
public void meta_endofsentence ()
	{ if (*miptr == '\0')
	     { char *smiptr = miptr;
	       callq ();
	       miptr = smiptr;
	     };
	  pushq (meta_endofsentence);
	};

public void meta_endofnumber ()
	{ if (mtotal == 0)
	     { callq ();
	       mtotal = 0;
	     };
	  pushq (meta_endofnumber);
	};

public void recbody_parse_terminal ()
	{ char *terminal = tops();
	  char *riptr = miptr;
	  register char *tptr, *sptr;
	  for (tptr = terminal, sptr = riptr; *tptr; tptr++, sptr++)
	     if (*tptr != *sptr)
		{ /* eventueel een error melding */
		  pushq (recbody_parse_terminal);
	          return;
		};
	  pop(1);
	  miptr = sptr;
	  callq();
	  miptr = riptr;
	  pushs (terminal);
	  pushq (recbody_parse_terminal);
	};

public void recbody_parse_number ()
	{ int num = popi ();
	  mtotal -= num;
	  /* if (0 <= mtotal) callq (); */
	  callq ();
	  mtotal += num;
	  pushi (num);
	  pushq (recbody_parse_number); 
	};

public void recbody_parse_set ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  if (!in_set (riptr, set))
	     { pushq (recbody_parse_set);
	       return;
	     };
	  miptr++;
	  pop(1);
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_set);
	};

public void recbody_parse_non_set ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  if (in_set (riptr, set) || !(*riptr))
	     { pushq (recbody_parse_non_set);
	       return;
	     };
	  miptr++;
	  pop(1);
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_non_set);
	};

public void recbody_parse_set_star ()
	{ char *set = pops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  int len = 0;
	  int count;

	  while (in_set (tiptr, set)) { tiptr++; len++; };
	  for (count = 0; count <= len; count++)
	     { miptr = riptr+count;
	       callq ();
	     };
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_set_star);
	};

public void recbody_parse_non_set_star ()
	{ char *set = pops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  int len = 0;
	  int count;

	  while (!in_set (tiptr, set) && *tiptr) { tiptr++; len++; };
	  for (count = 0; count <= len; count++)
	     { miptr = riptr+count;
	       callq ();
	     };
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_non_set_star);
	};

public void recbody_parse_set_plus ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  int len = 1;
	  int count;
	  if (!in_set (tiptr, set))
	     { pushq (recbody_parse_set_plus);
	       return;
	     };
	  pop(1);
	  tiptr++;
	  while (in_set (tiptr, set)) { tiptr++; len++; };
	  for (count = 1; count <= len; count++)
	     { miptr = riptr+count;
	       callq ();
	     };
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_set_plus);
	};

public void recbody_parse_non_set_plus ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  int len = 1;
	  int count;
	  if (in_set (tiptr, set) || !(*tiptr))
	     { pushq (recbody_parse_non_set_plus);
	       return;
	     };
	  pop(1);
	  tiptr++;
	  while (in_set (tiptr, set) && *tiptr) { tiptr++; len++; };
	  for (count = 1; count <= len; count++)
	     { miptr = riptr+count;
	       callq ();
	     };
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_non_set_plus);
	};

public void recbody_parse_set_star_strict ()
	{ char *set = pops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  while (in_set (tiptr, set)) tiptr++;
	  miptr = tiptr;
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_set_star_strict);
	};

public void recbody_parse_non_set_star_strict ()
	{ char *set = pops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
	  while (!in_set (tiptr, set) && *tiptr) tiptr++;
	  miptr = tiptr;
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_non_set_star_strict);
	};

public void recbody_parse_set_plus_strict ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
 
	  if (!in_set (tiptr, set))
	     { pushq (recbody_parse_set_plus_strict);
	       return;
	     };
	  pop(1);
	  do { tiptr++; }
	  while (in_set (tiptr, set));
	  miptr = tiptr;
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_set_plus_strict);
	};

public void recbody_parse_non_set_plus_strict ()
	{ char *set = tops ();
	  char *riptr = miptr;
	  char *tiptr = miptr;
 
	  if (in_set (tiptr, set) || !(*tiptr))
	     { pushq (recbody_parse_non_set_plus_strict);
	       return;
	     };
	  pop(1);
	  do { tiptr++; }
	  while (!in_set (tiptr, set) && *tiptr);
	  miptr = tiptr;
	  callq ();
	  miptr = riptr;
	  pushs (set);
	  pushq (recbody_parse_non_set_plus_strict);
	};

public void rec_parse_terminal ()
	{ char *terminal = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { if (strcmp (v -> v.string, terminal) == 0) callq ();
	     };
	  pushv (v);
	  pushs (terminal);
	  pushq (rec_parse_terminal);
	};

public void rec_parse_number ()
	{ int num = popi ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == numbertype)
	     { if (v -> v.number == num) callq ();
	     };
	  pushv (v);
	  pushi (num);
	  pushq (rec_parse_number);
	};

public void rec_parse_set ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_set);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_set);
	};

public void rec_parse_non_set ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_non_set);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_non_set);
	};

public void rec_parse_set_star ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_set_star);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_set_star);
	};

public void rec_parse_non_set_star ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_non_set_star);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_non_set_star);
	};

public void rec_parse_set_plus ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_set_plus);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_set_plus);
	};

public void rec_parse_non_set_plus ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_non_set_plus);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_non_set_plus);
	};

public void rec_parse_set_star_strict ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_set_star_strict);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_set_star_strict);
	};

public void rec_parse_non_set_star_strict ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_non_set_star_strict);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_non_set_star_strict);
	};

public void rec_parse_set_plus_strict ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_set_plus_strict);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_set_plus_strict);
	};

public void rec_parse_non_set_plus_strict ()
	{ char *aset = pops ();
	  valuenode v = popv ();
	  if (v -> type == undefinedtype) callq ();
	  else if (v -> type == stringtype)
	     { miptr = v -> v.string;
	       pushq (meta_endofsentence);
	       pushs (aset);
	       pushq (recbody_parse_non_set_plus_strict);
	       callq ();
	       pop (3);
	     };
	  pushv (v);
	  pushs (aset);
	  pushq (rec_parse_non_set_plus_strict);
	};
