/* Implementation of lexical analysis module.
 *
 * Copyright 2000 KUN.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Library General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/* $Id: rtslex.c,v 1.58 2003/10/31 14:59:49 pspiertz Exp $ */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_MALLOC_H
#include <malloc.h>
#else /* HAVE_MALLOC_H */
#include <stdlib.h>
#endif /* HAVE_MALLOC_H */

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include "pattern.h"
#include "rtsutil.h"
#include "rtsio.h"
#include "rtsesc.h"
#include "rtscode.h"
#include "rtslex.h"
#ifdef PMRTS
#include "posmemo.h"
#endif /* PMRTS */

/* see match_prio1_tokens */
#define MATCH_TOKENS_WITH_PREFERENCE 0

#define USING_MATCH_LIST 0

#ifdef DEBUG
#undef MEM_STATS
#undef COUNT_TRACE
#undef PARTS_TRACE
#undef STATE_TRACE
#undef LEXI_TRACE
#undef NONTNR_TRACE
#undef DEBUG_NONTNR
#undef DEBUG_NONT_CLASSES
#undef PRINT_PARTS_SEMICOLON
#undef PRINT_STATE_PTRS
#undef SHOW_RE_LISTS
#undef SHOW_LEXINFO_NRS
#undef SHOW_ZERO_DEST
#define DB(x) x
#else
#define DB(x)
#endif
/* may also compile with -DSTANDALONE_LEXER for test environment */

#ifdef COUNT_TRACE
long n_trel_builds = 0;
long n_parts_tok_returns = 0;
#endif

/*
 * BUG!
 */
#define lex_print_formatted printf

/*
//------------------------------------------------------------------------------
// Token types and markers
//------------------------------------------------------------------------------
*/

/* LexemeType defined in rtslex.h */

typedef enum
{
  SepRequired,
  /* SepNoBlank, */
  SepDontCare
} SeparatorType;

/* enum *Mark moved to .h */

unsigned char
get_lex_mark(LexemeType lex_type)
{
  unsigned char lex_mark;
  switch (lex_type) {
    case Prefix:
      lex_mark = PrefixMark;
      break;
    case Infix:
      lex_mark = InfixMark;
      break;
    case Suffix:
      lex_mark = SuffixMark;
      break;
    case MultiToken:
      lex_mark = MultiTokenMark;
      break;
    default:
      lex_mark = EmptyMark;
      break;
  }
  return lex_mark;
}

/*
//------------------------------------------------------------------------------
// Global data
//------------------------------------------------------------------------------
*/

static const LexInfo*        lex_info;

static const char module_name[]        = "rtslex.c";

static unsigned char        blank_table[256];
static unsigned char        terminator_table[256];
static unsigned char        invisible_table[256];
static unsigned char        translate_table[256];
static unsigned char        delimiter_table[3];

static RegExp**       match_table_p;
static RegExp**       skip_table_p;
static LexemeType	*skip_regexp_lex_types;
static LexemeType	*match_regexp_lex_types;

#if NONT_CLASSES && 0	/* moved to .h */
static int gr_term_class;
static int re_match_class;
static int re_skip_class;
#endif
int NR_classes; /* = NR_nont + 3 */

typedef struct
{
  size_t        obj_size;
  void*         free_list;
  unsigned      requested;
  unsigned      allocated;
  unsigned      freed;
} MemInfo;

static MemInfo        trellis_mem;
#if NONT_CLASSES
static MemInfo        statenode_mem;
static MemInfo        translist_mem;
#endif
static MemInfo        transition_mem;
static MemInfo        neg_memo_mem;
static MemInfo        pos_memo_mem;

#ifdef SHOW_LEXINFO_NRS
static void show_lexinfo_nrs(char *where)
{
  printf("lex_info nrs at %s:\n", where);
#  if NONT_CLASSES
  printf("nr_nont_classes=%d ", lex_info->nr_nont_classes);
#  endif
  printf("nr_terminals=%d ", lex_info->nr_terminals);
  printf("nr_matches=%d ", lex_info->nr_matches);
  printf("nr_skips=%d ", lex_info->nr_skips);
  printf("nr_neg_memos=%d ", lex_info->nr_neg_memos);
  printf("nr_syntax_nonterminals=%d ", lex_info->nr_syntax_nonterminals);
  printf("\n");
}
#endif

/*
//------------------------------------------------------------------------------
// Module interface implementation
//------------------------------------------------------------------------------
*/

#if 0
static void
lex_abort(const char* str)
{
  AGFL_abort(str);
}

static void
lex_print(const char* str)
{
  AGFL_print(str);
}

enum { MAX_STR_LEN = 8192 };

static void
lex_printf(const char* fmt, ...)
{
  char buf[MAX_STR_LEN];
  va_list argp;
  va_start(argp, fmt);
  vsprintf(buf, fmt, argp);
  va_end(argp);
  lex_print(buf);
}
#endif

static void
log_unknown_token(unsigned pos, const unsigned char* str, unsigned len)
{
    if (lex_info->log_unknown != NULL) {
        (*lex_info->log_unknown)(pos, (char*)str, len);
    }
}

unsigned
get_nr_neg_memos(void)
{
    return lex_info->nr_neg_memos;
}

unsigned long
get_nr_syntax_nonterminals(void)
  { return lex_info->nr_syntax_nonterminals; }
#define get_nr_pos_memos get_nr_syntax_nonterminals

unsigned
get_nr_terminals(void)
  { return lex_info->nr_terminals; }

static unsigned
get_nr_matches(void)
  { return lex_info->nr_matches; }

static unsigned
get_nr_skips(void)
  { return lex_info->nr_skips; }

const unsigned char*
get_terminal(unsigned i)
  { return (unsigned char*)lex_info->terminals[i]; }

static const LEXICON*
get_lexicon()
{
    return lex_info->lexicon;
}

static const Trie*
get_trie()
{
    return lex_info->trie;
}

static const char**
get_matches()
{
    return lex_info->matches;
}

static const char**
get_skips()
{
    return lex_info->skips;
}

Terminal
get_eos_terminal(void)
  { return lex_info->eos_terminal; }

static const unsigned char*
get_blanks(void)
{
  const unsigned char* blanks = (unsigned char*)lex_info->blanks;
  return blanks;
}

static const unsigned char*
get_terminators(void)
{
  const unsigned char* terminators = (unsigned char*)lex_info->terminators;
  return terminators;
}

static const unsigned char*
get_invisibles(void)
{
  const unsigned char* invisibles = (unsigned char*)lex_info->invisibles;
  return invisibles;
}

static const unsigned char*
get_delimiters(void)
{
  const unsigned char* delimiters = (unsigned char*)lex_info->delimiters;
  return delimiters;
}

static const unsigned char*
get_translate_src(void)
{
  const unsigned char* translate_src = (unsigned char*)lex_info->translate_src;
  return translate_src;
}

static const unsigned char*
get_translate_dst(void)
{
  const unsigned char* translate_dst = (unsigned char*) lex_info->translate_dst;
  return translate_dst;
}

/*
//------------------------------------------------------------------------------
// Memory manager
//------------------------------------------------------------------------------
*/

static void
init_mem_info(MemInfo* mem_info, size_t obj_size)
{
  mem_info->obj_size = obj_size;
  mem_info->free_list = NULL;
  mem_info->requested = 0;
  mem_info->allocated = 0;
  mem_info->freed = 0;
}

static void
end_mem_info(MemInfo* mem_info)
{
  void* mem = mem_info->free_list;
  while (mem != NULL) {
    void* next = *(void**)mem;
    FreeMem(mem, "end_mem_info");
    mem = next;
    mem_info->freed++;
  }
  mem_info->free_list = NULL;
}

static void*
alloc_mem(MemInfo* mem_info)
{
  void* mem;
  if (mem_info->free_list == NULL) {
    mem = GetMem(mem_info->obj_size, "alloc_mem");
    mem_info->allocated++;
  } else {
    mem = mem_info->free_list;
    mem_info->free_list = *(void**)mem;
  }
  mem_info->requested++;
  return mem;
}

static void
free_mem(MemInfo* mem_info, void* mem)
{
  *(void**)mem = mem_info->free_list;
  mem_info->free_list = mem;
}

#ifdef MEM_STATS
static void
show_mem_info(const MemInfo* mem_info, const char* str)
{
  rtsMessage("Memory stats for %s:\n"
             "  requested %d\n"
             "  allocated %d\n"
             "  freed     %d\n",
             str, mem_info->requested, mem_info->allocated, mem_info->freed);
}
#endif /* MEM_STATS */

static void
init_memory_manager(int nr_nont_classes)
{
  unsigned nr_neg_memos = get_nr_neg_memos();
#ifdef PMRTS
  unsigned nr_pos_memos = get_nr_pos_memos();
#endif /* PMRTS */

  init_mem_info(&trellis_mem, sizeof(Trellis));
  init_mem_info(&neg_memo_mem, sizeof(void*) + nr_neg_memos * sizeof(NegMemo));
#ifdef PMRTS
  init_mem_info(&pos_memo_mem, nr_pos_memos * sizeof(PosMemo));
#endif /* PMRTS */
  init_mem_info(&statenode_mem, sizeof(StateNode));
  init_mem_info(&translist_mem, nr_nont_classes * sizeof(Transition *));
  init_mem_info(&transition_mem, sizeof(Transition));
}

static void
end_memory_manager(void)
{
    end_mem_info(&neg_memo_mem);
    end_mem_info(&pos_memo_mem);
    end_mem_info(&statenode_mem);
    end_mem_info(&translist_mem);
    end_mem_info(&transition_mem);
    end_mem_info(&trellis_mem);
#ifdef MEM_STATS
    show_mem_info(&neg_memo_mem,  "neg_memo tables");
    show_mem_info(&pos_memo_mem,  "posmemo tables");
    show_mem_info(&statenode_mem, "StateNodes");
    show_mem_info(&translist_mem, "TransitionLists");
    show_mem_info(&transition_mem, "Transitions");
    show_mem_info(&trellis_mem, "trelles");        /* FN: plural? */
#endif /* MEM_STATS */
}

static NegMemo*
alloc_neg_memos(void)
{
  NegMemo* neg_memos = (NegMemo*)alloc_mem(&neg_memo_mem);
  return neg_memos;
}

static void
free_neg_memos(NegMemo* neg_memos)
{
    free_mem(&neg_memo_mem, neg_memos);
}

#ifdef PMRTS
static PosMemo*
alloc_pos_memos(void)
{
    long posmemo_size = get_nr_syntax_nonterminals() * sizeof(PosMemo);
    PosMemo* pos_memos = (PosMemo*) GetMem(posmemo_size, "alloc_pos_memos");
    return pos_memos;
}

static void
free_pos_memos(PosMemo* pos_memos)
{
    long posmemo_size = get_nr_syntax_nonterminals();
    long i;
    for (i = 0; i < posmemo_size; ++i) {
        posmemo_free_vec(&(pos_memos[i]));
    }
    FreeMem(pos_memos, "free_pos_memos");
}
#endif /* PMRTS */

Transition*
alloc_transition(void)
{
  Transition* transition = (Transition*)alloc_mem(&transition_mem);
  return transition;
}

void
free_transition(Transition* transition)
{
    free_mem(&transition_mem, transition);
}

static Transition**
alloc_translist(void)
{
  Transition** translist = (Transition**)alloc_mem(&translist_mem);
  return translist;
}

static void
free_translist(Transition** translist)
  { free_mem(&translist_mem, translist); }

/*
// If there is a StateNode at a certain pos, at least one of the
// classes must have a transition.
*/

static StateNode*
alloc_statenode(void)
{
  StateNode* statenode = (StateNode*)alloc_mem(&statenode_mem);
  statenode->trans_lists = alloc_translist();
  return statenode;
}

static void
free_statenode(StateNode* statenode)
{
    free_translist(statenode->trans_lists);
    free_mem(&statenode_mem, statenode);
}

static void
init_statenode(StateNode* statenode, Position pos)
{
    Transition** tlists = statenode->trans_lists;
    int class;
    for (class = NR_classes - 1; class >= 0; class--)
    {
      tlists[class] = NULL;
    }
    statenode->neg_memos = NULL;
#ifdef PMRTS
    statenode->pos_memos = NULL;
#endif /* PMRTS */
    statenode->pos = pos;
}

#ifdef PARTS_TRACE
static void
dump_trans_lists_ptrs(char *procname,
                        Position cur_pos, Transition** trans_lists)
{
    int class;
    for (class = NR_classes - 1; class >= 0; class--)
    {
      Transition* transition = trans_lists[class];
      if (transition != NULL)
      {
        rtsMessage("%s: pos %d class %d list:%p\n",
                procname, cur_pos, class, transition);
      }
    }
}
#endif /* PARTS_TRACE */

static StateNode**
alloc_state_row(unsigned len)
{
    size_t size = len * sizeof(StateNode*);
    return (StateNode**)GetMem(size, "alloc_state_row");
}

static void
free_state_row(StateNode** row)
{
    FreeMem(row, "free_state_row");
}

static AggregatePartStates*
alloc_init_aggregate_array(unsigned len)
{
  AggregatePartStates* array
        = (AggregatePartStates*)GetMem(len * sizeof(AggregatePartStates),"alloc_init_aggregate_array");
  unsigned index;
  for (index = 0; index < len; index++)
  {
    array[index] = 0;
  }
  return array;
}

static void
free_aggregate_array(AggregatePartStates* array)
{
    FreeMem(array, "free_aggregate_array");
}

Trellis*
alloc_trellis(unsigned len)
{
  Trellis* trellis = (Trellis*)alloc_mem(&trellis_mem);
#if NONT_CLASSES
  trellis->states_row = alloc_state_row(len);
  trellis->pstates_row = alloc_state_row(len);
#else
  trellis->state_row = alloc_state_row(len);
  trellis->parts_row = alloc_state_row(len);
#endif
  trellis->last_part_end_from = (Position*)GetMem(len * sizeof(Position), "alloc_trellis");
  return trellis;
}

static void
free_trellis(Trellis* trellis)
{
  FreeMem(trellis->last_part_end_from, "free_trellis");
#if NONT_CLASSES
  free_state_row(trellis->states_row);
  free_state_row(trellis->pstates_row);
#else
  free_state_row(trellis->state_row);
  free_state_row(trellis->parts_row);
#endif
  free_mem(&trellis_mem, trellis);
}

/*
//------------------------------------------------------------------------------
// Table functions
//------------------------------------------------------------------------------
*/

static void
set_table(unsigned char* table, const unsigned char* str)
{
  unsigned char c;
  unsigned i;
  if (str == NULL) {
	rtsAbort("set_table", "called with NULL string");
  }
  for (i = 0; i < 256; i++)
    table[i] = '\0';
  while ((c = *str++))
    table[c] = c;
}

static void
set_trans_table(unsigned char* table, const unsigned char* src, const unsigned char* dst)
{
  unsigned char c;
  unsigned i;
  if (src == NULL || dst == NULL) {
	rtsAbort("set_trans_table", "called with NULL string");
  }
  for (i = 0; i < 256; i++)
    table[i] = i;
  while ((c = *src++))
    table[c] = *dst++;
}

typedef enum
{
  Opener = 0,
  Middler = 1,
  Closer = 2
} Delimiter;

static void
set_list_delimiters(const unsigned char* delims)
{
  if (delims == NULL)
  {
    delimiter_table[0] = EosMark;
    delimiter_table[1] = EosMark;
    delimiter_table[2] = EosMark;
  }
  else
  {
    delimiter_table[Opener] = delims[0];
    delimiter_table[Middler] = delims[1];
    delimiter_table[Closer] = delims[2];
  }
}

static void
init_char_tables(void)
{
  set_table(blank_table, get_blanks());
  set_table(terminator_table, get_terminators());
  set_table(invisible_table, get_invisibles());
  set_trans_table(translate_table, get_translate_src(), get_translate_dst());
  terminator_table[EosMark] = 1;
  set_list_delimiters(get_delimiters());
}

static int
is_eos(unsigned char c)
  { return c == EosMark; }

static int
is_space(unsigned char c)
  { return c == SpaceMark; }

static int
is_blank(unsigned char c)
  { return blank_table[(int)c] != '\0'; }

static int
is_terminator(unsigned char c)
  { return terminator_table[(int)c] != '\0'; }

static int
is_invisible(unsigned char c)
  { return invisible_table[(int)c] != '\0'; }

static int
translate(unsigned char c)
  { return translate_table[(int)c]; }

#if USING_MATCH_LIST
static int
is_delimiter(unsigned char c, Delimiter delim)
{
  unsigned char d = delimiter_table[delim];
  return c == d && d != '\0';
}
#endif

LexemeType
derive_lex_type_and_strip_hyphens(char const **p_txtbeg, char const **p_txtend)
/* assumes real hyphens are escaped with '\'
*/
{
    char const *t_end = *p_txtend - 1;
    int has_endhyph = 0;

    if (*p_txtend - *p_txtbeg < 2) {
	return SingleToken;	/* avoid indexing before string */
    }

    if (*t_end == '-') {
	if (t_end[-1] != '\\') {
		has_endhyph++;
	} else {
		/* count the number of backslashes:
		   if odd, the hyphen is escaped, so we don't have a prefix
		 */
		char const *hp = t_end - 1;
		while ((hp > *p_txtbeg) && (*--hp == '\\')) ;
		if (*hp != '\\') {
			hp++;
		}
		has_endhyph = !((t_end - hp) % 2);
	}
    }

    if (has_endhyph) {
	*p_txtend = t_end;
	if (**p_txtbeg == '-') {
	    (*p_txtbeg)++;
	    return Infix;
	} else {
	    return Prefix;
	}
    } else if (**p_txtbeg == '-') {
	(*p_txtbeg)++;
	return Suffix;
    } else {
	return SingleToken;
    }
} /* derive_lex_type_and_strip_hyphens */

/*
//------------------------------------------------------------------------------
// Regular expressions
//------------------------------------------------------------------------------
*/

static void
regexp_abort(char *func_name, int error, const char* regexp)
{
  if (error < 0)
    rtsAbort(func_name, "regular expressions not supported");
  else
    rtsAbort(func_name, "invalid regular expression: '%s'", regexp);
}

/* TODO: delete compiled regexp table from generated code */

static LexemeType*
alloc_re_lex_types(unsigned sz)
{
  return (LexemeType*)GetMem(sz * sizeof(LexemeType), "alloc_re_lex_types");
}

static void
free_re_lex_types(LexemeType *re_lex_types)
{
  FreeMem(re_lex_types, "free_re_lex_types");
}

static RegExp**
compile_regexps(unsigned sz, const char** regexps, LexemeType **re_lex_types_h)
{
  RegExp** table = NULL;
  if (sz > 0)
  {
    unsigned i;
    LexemeType *re_lex_types = alloc_re_lex_types(sz);
    *re_lex_types_h = re_lex_types;
    table = (RegExp**)GetMem(sz * sizeof(RegExp*), "compile_regexps");

    for (i = 0; i < sz; i++)
    {
      int error;
      char const *re_beg = regexps[i];
      char const *re_end = re_beg + strlen(re_beg);

      re_lex_types[i] = derive_lex_type_and_strip_hyphens(&re_beg, &re_end);
#if !RE_ALSO_PART
      if (re_lex_types[i] != SingleToken) {
	rtsMessage("warning: RegExp \"%s\" will not be matched.\n", regexps[i]);
	rtsMessage("   This version only uses whole word RegExps.\n");
      }
#endif
      if (*re_end) {
	/* re_end was set back, so we use a local copy of the string */
	int re_len = re_end - re_beg;
	char *re_nbeg = (char *)GetMem(re_len + 1, "compile_regexps:dup");
	strncpy(re_nbeg, re_beg, re_len);
	re_nbeg[re_len] = '\0';
	table[i] = new_regexp(re_nbeg, &error);
	FreeMem(re_nbeg, "compile_regexps:free");
      } else {
	table[i] = new_regexp(re_beg, &error);
      }
      if (error != 0)
      {
        regexp_abort("compile_regexps", error, regexps[i]);
      }
    }
  }
  return table;
}

static void
delete_regexps(unsigned sz, RegExp** table, LexemeType *re_lex_types)
{
  if (table != NULL)
  {
    unsigned i;
    for (i = 0; i < sz; i++)
    {
      delete_regexp(table[i]);
    }
    FreeMem(table, "delete_regexp");
  }
  if (sz > 0) {
	free_re_lex_types(re_lex_types);
  }
}

#ifdef SHOW_RE_LISTS
int debug_nrm;
int debug_nrs;

static void
show_re_lists(char *where)
{
	int rnr;
	printf("%s match_table_p:\n", where);
	for (rnr = 0; rnr < debug_nrm; rnr++) {
		printf(" 0x%0x", match_table_p[rnr]);
	}
	printf("\n");
	printf("%s skip_table_p:\n", where);
	for (rnr = 0; rnr < debug_nrs; rnr++) {
		printf(" 0x%0x", skip_table_p[rnr]);
	}
	printf("\n");
}
#endif /* SHOW_RE_LISTS */

static void
init_regexps(void)
{
  match_table_p = compile_regexps(get_nr_matches(), get_matches(),
						&match_regexp_lex_types);
  skip_table_p = compile_regexps(get_nr_skips(), get_skips(),
						&skip_regexp_lex_types);
#ifdef SHOW_RE_LISTS
  debug_nrm = get_nr_matches();
  debug_nrs = get_nr_skips();
  show_re_lists("init_regexps");
#endif /* SHOW_RE_LISTS */
}

static void
end_regexps(void)
{
  delete_regexps(get_nr_matches(), match_table_p, match_regexp_lex_types);
  delete_regexps(get_nr_skips(), skip_table_p, skip_regexp_lex_types);
}

/* moved to .h: typedef enum { RegMatch, RegSkip }  RegType; */

unsigned
get_nr_regexps(RegType reg_type)
{
  switch (reg_type)
  {
    case RegMatch:
      return get_nr_matches();
      break;
    case RegSkip:
      return get_nr_skips();
      break;
  }
  assert(0);
  return 0;
}

static RegExp*
get_regexp(RegType reg_type, unsigned i)
{
#ifdef SHOW_RE_LISTS
  printf("get_regexp(RegType=%d, unsigned=%d {should be < %d})\n",
			reg_type,	i,	get_nr_regexps(reg_type));
  show_re_lists("get_regexp");
#endif /* SHOW_RE_LISTS */
  switch (reg_type)
  {
    case RegMatch:
      return match_table_p[i];
      break;
    case RegSkip:
      return skip_table_p[i];
      break;
  }
  assert(0);
  return 0;
}

static LexemeType
regexp_lex_type(unsigned id, RegType reg_type)
{
  switch (reg_type)
  {
    case RegSkip:
	return skip_regexp_lex_types[id];
    case RegMatch:
	return match_regexp_lex_types[id];
    default:
	assert(0);	/* gcc doesn't see we'll never reach this part */
	return 0;
  }
}

/*
//------------------------------------------------------------------------------
// Initialization and finalization of module
//------------------------------------------------------------------------------
*/

void
init_lexer(const LexInfo* info)
{
  assert(info != NULL);
  lex_info = info;
  NR_classes = nr_lexicon_nonterminals + 3; /* from rtscode.h */
#if NONT_CLASSES
  gr_term_class = DECODE_TERM_OR_RE_CLASS(ENCODE_TERM(0));
  re_match_class = DECODE_TERM_OR_RE_CLASS(ENCODE_MATCH(0));
  re_skip_class = DECODE_TERM_OR_RE_CLASS(ENCODE_SKIP(0));
# ifdef DEBUG_NONT_CLASSES
  rtsMessage("%s: directors_option=%d neg_memo_option=%d\n",
                "init_lexer", directors_option, neg_memo_option);
  rtsMessage("%s: pos_memo_option=%d\n", "init_lexer", pos_memo_option);
  rtsMessage("%s: gr_term_class=%d re_match_class=%d re_skip_class=%d\n",
                "init_lexer", gr_term_class, re_match_class, re_skip_class);
# endif
#endif

  init_memory_manager(NR_classes);
  init_char_tables();
  init_regexps();
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs("end of init_lexer");
#endif
}

void
end_lexer(void)
{
  end_regexps();
  end_memory_manager();
}

/*
//------------------------------------------------------------------------------
// Initialization of states
//------------------------------------------------------------------------------
*/

const char*	eos_text	= "<EOS>";
const PENALTY	penalty_unknown = 0;

/*
// Encoding functions for State.terminal field.
// See rtslex.h for actual bit encodings.
// code_nonterminal	used in State expansion (by PARAMs) after match process
// code_terminal	used in the match process
// code_regexp		used in the match process
*/

Terminal
code_nonterminal(unsigned id, int arity)
{
  return ENCODE_NONT(id, arity);
}

static Terminal
code_terminal(unsigned id)
{
#if 0
  return (id | TermBit);
#else
  return ENCODE_TERM(id); /* FN */
#endif
}

static Terminal
code_regexp(unsigned id, RegType reg_type)
{
#if 0
  Terminal term;
  switch (reg_type)
  {
    case RegSkip:
      term = (id | SkipBit);
      break;
    case RegMatch:
      term = (id | MatchBit);
      break;
  }
  return term;
#else
  switch (reg_type)
  {
    case RegSkip:
      return ENCODE_SKIP(id);
    case RegMatch:
      return ENCODE_MATCH(id);
    default:
      assert(0);        /* gcc doesn't see we'll never reach this part */
      return 0;
  }
#endif
}

static StateBits
code_state_type(LexemeType lex_type)
{
  StateBits type = 0x0000;
  switch (lex_type)
  {
    case Prefix:
      type |= PrefixBit;
      break;
    case Infix:
      type |= InfixBit;
      break;
    case Suffix:
      type |= SuffixBit;
      break;
    case SingleToken:
      type |= SingleTokenBit;
      break;
    case MultiToken:
      type |= MultiTokenBit;
      break;
    default:
      break;
  }
  return type;
}

LexemeType get_transition_lex_type(Transition* transition)
{
  switch (transition->type & (PrefixBit | InfixBit | SuffixBit
                         | SingleTokenBit | MultiTokenBit))
  {
    case PrefixBit:
      return Prefix;
    case InfixBit:
      return Infix;
    case SuffixBit:
      return Suffix;
    case SingleTokenBit:
      return SingleToken;
    case MultiTokenBit:
      return MultiToken;
    default:
      assert(0);
      return -1;
  }
}

static const char*
get_eos_text(void)
{
  return eos_text;
}

const char*
get_terminal_text(unsigned id, LexemeType lex_type)
{
  const char* terminal = (char*)get_terminal(id);
  /* maybe skip control-character indicating lexeme type */
  return (lex_type == SingleToken) ? terminal : terminal + 1;
}

char*
copy_string(const char* src, unsigned len)
{
  char* dst = (char*)GetMem(len + 1, "copy_string");
  strncpy(dst, src, len);
  dst[len] = EosMark;
  return dst;
}

typedef StateBits TransTest(const Transition* transition);

static StateBits
trans_true(const Transition* transition)
{
  return 1;
}

static StateBits
is_eos_transition(const Transition* transition)
{
  return transition->type & EosBit;
}

static StateBits
is_terminal_transition(const Transition* transition)
{
  return transition->type & TermBit;
}

static StateBits
is_lexicon_transition(const Transition* transition)
{
  return transition->type & LexBit;
}

static StateBits
is_skip_regexp_transition(const Transition* transition)
{
  return transition->type & SkipBit;
}

static StateBits
is_match_regexp_transition(const Transition* transition)
{
  return transition->type & MatchBit;
}

static StateBits
regexp_state_bit(RegType reg_type)
{
  switch (reg_type)
  {
    case RegSkip:
      return SkipBit;
      break;
    case RegMatch:
      return MatchBit;
      break;
  }
  assert(0);        /* gcc doesn't see we'll never reach this part */
  return 0;
}

/*
// TODO: Maybe we should clear params, memos, next, and trans in alloc_state.
*/

#if TRANS_BY_POS
# define INIT_TRANSITION_DEST(transition) transition->trans_dest_pos = 0
#else
# define INIT_TRANSITION_DEST(transition) transition->trans_dest_state = NULL
#endif

void
init_transition(Transition* transition, unsigned pos)
{
  transition->params = NULL;
  transition->penalty = penalty_unknown;
#if NONT_CLASSES
  INIT_TRANSITION_DEST(transition);
#else
  transition->pos = pos;
  transition->neg_memos = NULL;
  transition->pos_memos = NULL;
  transition->trans_dest_state = NULL;
#endif
  transition->next = NULL;
}

void
init_eos_transition(Transition* transition, unsigned pos)
{
  init_transition(transition, pos);
  transition->terminal = code_terminal(get_eos_terminal());
  transition->text = get_eos_text();
  transition->type = EosBit | code_state_type(SingleToken);
}

void
init_terminal_transition(Transition* transition, unsigned pos, unsigned id,
                    const unsigned char* from, const unsigned char* to,
                    LexemeType lex_type)
{
  init_transition(transition, pos);
  transition->terminal = code_terminal(id);
  transition->text = get_terminal_text(id, lex_type);
  transition->type = TermBit | code_state_type(lex_type);
}

/*
// Currently, states may be duplicated (for each parameter list)
// by add_transition_entry_params()
// called from add_state_list_entries() or expand_and_insert_transition().
// TODO: store list of parameter lists in lexicon state
// instead of duplicating states and change MATCH_LEX accordingly.
*/

void
init_lexicon_transition(Transition* transition, unsigned pos, long info,
                   const unsigned char* from, const unsigned char* to,
                   LexemeType lex_type)
{
  unsigned length = to - from;
  init_transition(transition, pos);
  /* params and penalty filled later by add_transition_entry_params() */
  transition->terminal = info; /* changed by add_transition_entry_params() */
  transition->text = copy_string((char*)from, length);
  transition->type = LexBit | TxtFreeBit | code_state_type(lex_type);
}

void
init_regexp_transition(Transition* transition, unsigned pos, unsigned id,
                  const unsigned char* from, const unsigned char* to,
                  RegType reg_type)
{
  unsigned length = to - from;
  init_transition(transition, pos);
  transition->terminal = code_regexp(id, reg_type);
  transition->text = copy_string((char*)from, length);
  transition->type = regexp_state_bit(reg_type)
                | TxtFreeBit | code_state_type(regexp_lex_type(id, reg_type));
}

static StateNode* const FAILURE = (StateNode*)1;

static void
mark_failure(StateNode** state_row, unsigned pos)
{
  state_row[pos] = FAILURE;
}

static int
has_failure(StateNode* state)
{
  return state == FAILURE;
}

static void
mark_token_start(Transition* state)
{
  state->type |= TokenStartBit;
}

static StateBits
is_token_start(const Transition* state)
{
  return state->type & TokenStartBit;
}

static StateBits
not_token_start(const Transition* state)
{
  return !is_token_start(state);
}

static void
mark_live_final_part(Transition* state)
{
  state->type |= (TokenPartBit | FinalPartBit);
}

static void
mark_live_nonfinal_part(Transition* state)
{
  state->type |= (TokenPartBit | NonfinalPartBit);
}

static StateBits
is_live_token_part(const Transition* state)
{
  return state->type & TokenPartBit;
}

static StateBits
not_live_token_part(const Transition* state)
{
  return !is_live_token_part(state);
}

/*
// Final: part has transition to next token;
// Nonfinal: part has transition to next part.
// (A part may have both flags on.)
*/

static StateBits
is_final_part(Transition* state)
{
  return state->type & FinalPartBit;
}

static StateBits
is_nonfinal_part(Transition* state)
{
  return state->type & NonfinalPartBit;
}

/* transition kind, see also IS_LASTPART / HAS_PARTS_TRANSITION in rtslex.h */

static StateBits
has_transition(const Transition* state)
{
  return state->type & (TransPartsBit | TransTokenBit);
}

static StateBits
has_token_transition(const Transition* state)
{
  return state->type & TransTokenBit;
}

#if TRANS_BY_POS
#define TDEST_FIELD trans_dest_pos
#define add_parts_transition(src,dest,dpos)        ADD_parts_transition(src,dpos)
#else
#define TDEST_FIELD trans_dest_state
#define add_parts_transition(src,dest,dpos)        ADD_parts_transition(src,dest)
#endif

void
ADD_transition(Transition* state, StateIndicator tdest)
{
  assert(!has_transition(state));
  state->TDEST_FIELD = tdest;
#ifdef SHOW_ZERO_DEST
  if (!tdest) {
	fprintf(stderr, "ADD_transition: tdest=0x%p for trans 0x%p\n",
						tdest,		state);
  }
#endif
  state->type |= TransTokenBit;
}

static void
ADD_parts_transition(Transition* state, StateIndicator tdest)
{
  assert(!has_transition(state));
  state->TDEST_FIELD = tdest;
#ifdef SHOW_ZERO_DEST
  if (!tdest) {
	fprintf(stderr, "ADD_parts_transition: tdest=0x%p for trans 0x%p\n",
							tdest,		state);
  }
#endif
  state->type |= TransPartsBit;
}

static const StateIndicator
get_transition_dest(const Transition* state)
{
  return state->TDEST_FIELD;
}

static void
store_length(Transition* state, unsigned len)
{
  assert(!has_transition(state));
  state->TDEST_FIELD = (StateIndicator)len;
}

unsigned
get_length(const Transition* state)
{
  assert(!has_transition(state));
  return (unsigned)state->TDEST_FIELD;
}

/*
//------------------------------------------------------------------------------
// Function
//------------------------------------------------------------------------------
*/
static int
add_transition_entry_params(Transition* transition, off_t entry_idx,
                                        const LEXICON* the_lex, off_t* nontnr)
{
  int arity;

  get_params_from_entry_in_list(the_lex, entry_idx, nontnr, &arity,
                                &(transition->penalty), &(transition->params));

#ifdef NONTNR_TRACE
  rtsMessage("add_transition_entry_params: ->term %d nont %d arity %d\n",
                                    transition->terminal, *nontnr, arity);
#endif

  transition->terminal = code_nonterminal(*nontnr, arity);

  return 1;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static void
//        insert_transition(StateNode** state_row, unsigned pos, int nont_class,
//                                        Transition* transition, unsigned len)
//
// Description:
//        Insert transition in list for class nont_class, at position pos
//        in state_row, covering input at position pos with length len.
// CRUCIAL:
//        The new transition should be inserted in front of the list,
//        in order to keep the (possibly shared) tail intact.
//        
//------------------------------------------------------------------------------
*/

#if NONT_CLASSES

void
insert_transition(StateNode** states_row, unsigned pos, int nont_class,
                                        Transition* transition, unsigned len)
{
  if (states_row[pos] == NULL)
  {
    states_row[pos] = alloc_statenode();
    init_statenode(states_row[pos], pos);
  }
  {
    Transition** trans_lists = states_row[pos]->trans_lists;

#ifdef SHOW_ZERO_DEST
    if (!len) {
	fprintf(stderr, "insert_transition storing len %d into 0x%p \"%s\"\n",
					    len, transition, transition->text);
    }
#endif
    assert(len || is_eos_transition(transition));
    store_length(transition, len);
    transition->next = trans_lists[nont_class];
    trans_lists[nont_class] = transition;
  }
#ifdef STATE_TRACE
  {
    char * hyph_beg = transition->type & (InfixBit|SuffixBit) ? "-" : "";
    char * hyph_end = transition->type & (InfixBit|PrefixBit) ? "-" : "";
    rtsMessage("%s: pos %d st %p class %d transit %p'%s%s%s' x%lx, next %p\n",
                "insert_transition", pos, states_row[pos], nont_class,
                        transition, hyph_beg, transition->text, hyph_end,
                                transition->terminal, transition->next);
  }
#endif
}

static void
expand_and_insert_transition(StateNode** states_row, unsigned pos,
                off_t entry_idx, Transition* transition, unsigned len)
{
  const LEXICON* the_lex = get_lexicon();
  off_t nontnr;

  while (add_transition_entry_params(transition, entry_idx, the_lex, &nontnr)
         && try_advance_to_next_entry_in_list(the_lex, &entry_idx))
  {
    Transition* new = alloc_transition();
    *new = *transition;        /* copy whole struct */
    new->type = transition->type & ~TxtFreeBit;
    insert_transition(states_row, pos, nontnr, new, len);
  }
  insert_transition(states_row, pos, nontnr, transition, len);
}

#else /* no NONT_CLASSES */

static void
INSERT_state(State** state_row, unsigned pos, State* state, unsigned len)
{
  store_length(state, len);
  state->next = state_row[pos];
  state_row[pos] = state;
#ifdef STATE_TRACE
  {
    char * hyph_beg = state->type & (InfixBit|SuffixBit) ? "-" : "";
    char * hyph_end = state->type & (InfixBit|PrefixBit) ? "-" : "";
    rtsMessage(
        "insert_transition: inserted pos %d state %p'%s%s%s' x%x, next %p\n",
                        pos, state, hyph_beg, state->text, hyph_end,
                                                state->terminal, state->next);
  }
#endif
}
#define insert_transition(row,pos,nont_class,transition,len) \
                        INSERT_state(row,pos,transition,len)
#define expand_and_insert_transition(row,pos,entry_idx,transition,len) \
                        INSERT_state(row,pos,transition,len)

/*
//------------------------------------------------------------------------------
// Function
//        static void
//        add_state_list_entries(State* state, LEXICON* the_lex, int always) //
// Description:
//        For all lexicon states, find parameters from entry and store
//        them into the state itself. If there are multiple entries,
//        add a state for each extra entry (first entry uses original state).
//
//        Due to the sharing of the (tail of the) parts_list, some of the
//        states are encountered twice; of course, they should be expanded
//        only once. These states can be recognized because they are
//        the only ones in the state_row list that have the (live_)token_part
//        marker. While expanding from state_row[pos], always=0 so
//        token_parts are skipped, whereas parts_row[pos] expansion
//        is called with always=1.
//------------------------------------------------------------------------------
*/
static void
add_transition_list_entries(Transition* state, LEXICON* the_lex, int always)
{
  off_t nontnr;

  do {
    Transition* next_source_state = state->next;
    if (is_lexicon_transition(state) && (always || !is_live_token_part(state)))
    {
      off_t entry_idx = state->terminal;
      /* add_transition_entry_params also fixes the state->terminal field */
      while (add_transition_entry_params(state, entry_idx, the_lex, &nontnr)
             && try_advance_to_next_entry_in_list(&entry_idx, the_lex))
      {
        state->next = alloc_transition();
        *(state->next) = *state;        /* copy whole struct */
        state->next->type = state->type & ~TxtFreeBit;
        state = state->next;
      }
      state->next = next_source_state;
    }
    state = next_source_state;
  } while (state != NULL);
}

/* We are already in a non-NONT_CLASSES part. This code was kept here
// just in case we might decide to fill in the entries in the NONT_CLASSES
// case at the end of lexicalisation, like we did before we had NONT_CLASSES.
*/

#if NONT_CLASSES
static void
add_state_entries(StateNode* state, LEXICON* the_lex, int always)
{
  Transition** trans_lists = state->trans_lists;
  int class;
  for (class = NR_classes - 1; class >= 0; class--)
  {
    if (*trans_lists != NULL)
    {
#  ifdef NONTNR_TRACE
        rtsMessage("add_state_entries: class %d\n", class);
#  endif
        add_transition_list_entries(*trans_lists, the_lex, always);
    }
    trans_lists++;
  }
}
#else
#  define add_state_entries        add_transition_list_entries
#endif

static void
add_trellis_entries(Trellis* trellis, LEXICON* the_lex)
{
#if NONT_CLASSES
  StateNode** parts_row = trellis->pstates_row;
  StateNode** state_row = trellis->states_row;
#else
  State** state_row = trellis->state_row;
  State** parts_row = trellis->parts_row;
#endif
  unsigned len = trellis->length;
  unsigned i;
  for (i = 0; i < len; i++)
  {
    if (*state_row != NULL)
    {
      add_state_entries(*state_row, the_lex, 0);
    }
    state_row++;
    if (*parts_row != NULL)
    {
      add_state_entries(*parts_row, the_lex, 1);
    }
    parts_row++;
  }
}

#endif /* no NONT_CLASSES */

static void
append_to_transition_list(Transition** dest_ptr, Transition* state)
{
  while (*dest_ptr != NULL)
  {
    dest_ptr = &((*dest_ptr)->next);
  }
  *dest_ptr = state;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static State*
//        add_transitions(State** state_row, const unsigned char* input, unsigned pos)
//
// Description:
//        Add transition states to each state at position pos in state_row,
//        by lexicalizing the input beyond the token in each state, unless
//        the state already has a transition to the next part in a parts-token.
//        The length of the token in each state should have been coded in the
//        trans-field of each state.
//        Also, mark the state with TokenStart, so that add_parts_transitions
//        can see the difference and set TransTokenBit i.s.o. TransPartsBit.
//
// To do: TODO
//        Add transition to next token for parts ending before terminator,
//        even if they already have parts transtion.
//------------------------------------------------------------------------------
*/

static Position
lexicalize(Trellis* trellis, const unsigned char* input, unsigned pos);

static void
add_transitions(Trellis* trellis, const unsigned char* input, unsigned pos)
{
#if NONT_CLASSES
  StateNode** states_row = trellis->states_row;
  StateNode* st_node = states_row[pos];
  Transition* transition;
  int class;

  assert(st_node != NULL);
  for (class = NR_classes - 1; class >= 0; class--)
  {
    transition = st_node->trans_lists[class];
#else
    StateNode** states_row = trellis->state_row;
    State* transition = states_row[pos];
    assert(transition != NULL);
#endif
    while (transition != NULL)
    {
      /* For parts_tokens, backward_mark_partstate_sets() calls lexicalize ()
      // after the last part and marks the first part as token_start.
      // Their transitions are filled in by fix_parts_transitions()
      // after the whole sentence has been lexed.
      */
      if (!is_live_token_part(transition) && !is_eos_transition(transition))
      {
        unsigned len = get_length(transition);
        Position dest_pos = lexicalize(trellis, input, pos + len);
#ifdef SHOW_ZERO_DEST
	if (!states_row[dest_pos]) {
		fprintf(stderr, "add_transitions: add_tr(0x%p, 0, %d)\n",
							transition, dest_pos);
	}
#endif
        add_transition(transition, states_row[dest_pos], dest_pos);
      }
      if (!is_live_token_part(transition))
      {
        mark_token_start(transition); /* as opposed to interpart start */
      }
      /* marking of first part of a parts_token is done in
       * backward_mark_partstate_sets */
      transition = transition->next;
    } /* while */
#if NONT_CLASSES
  } /* for class */
#endif
}

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        skip_unknown_token(StateNode** state_row,
//                           const unsigned char* input, unsigned* pos_p)
//
// Description:
//        Skip input characters starting at the position pointed to by pos_p
//        until the next word terminator. However, skip at least one character.
//        For each skipped character, the corresponding position in state_row
//        is set to failure, and the position pointed to by pos_p is advanced.
//        Function log_unknown_token() is called for reporting the skipped string.
//------------------------------------------------------------------------------
*/

static void
skip_unknown_token(StateNode** state_row, const unsigned char* input, unsigned* pos_p)
{
  unsigned pos = *pos_p;
  const unsigned char* token_txt = input + pos;
  unsigned token_pos = pos;
  unsigned token_len = 0;
  assert(!is_eos(input[pos]));
  do
  {
    mark_failure(state_row, pos++);
    token_len++;
  } while (!is_terminator(input[pos]));
  log_unknown_token(token_pos, token_txt, token_len);
  *pos_p = pos;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static void
//        do_skip_invisible_char(StateNodeNode** state_row,
//                            const unsigned char* input, unsigned* pos_p)
//
// Description:
//        Skip one invisible character in input starting at the position
//        pointed to by pos_p. The corresponding position in state_row is
//        set to failure, and the position pointed to by pos_p is advanced.
//------------------------------------------------------------------------------
*/

static void
do_skip_invisible_char(StateNode** state_row,
                        const unsigned char* input, unsigned* pos_p)
{
  unsigned pos = *pos_p;
  assert(is_invisible(input[pos]));
  mark_failure(state_row, pos++);
  *pos_p = pos;
}

static void
may_skip_failures(StateNode** state_row, unsigned* pos_p)
{
  unsigned pos = *pos_p;
  while (has_failure(state_row[pos]))
  {
    pos++;
  }
  *pos_p = pos;
}

#if 0 /*F1*/
/*
//------------------------------------------------------------------------------
// Function:
//        static void
//        skip_blanks(StateNode** state_row,
                        const unsigned char* input, unsigned* pos_p)
//
// Description:
//        Skip zero or more blanks in input starting at the position 
//        pointed to by pos_p. For each skipped character, the corresponding
//        position in state_row is set to failure, and the position pointed
//        to by pos_p is advanced.
//------------------------------------------------------------------------------
*/

static void
skip_blanks_or_failures(StateNode** state_row,
                        const unsigned char* input, unsigned* pos_p)
{
  unsigned pos = *pos_p;
#if 0
  while (is_blank(input[pos]) || has_failure(state_row[pos]))
    mark_failure(state_row, pos++);
  *pos_p = pos;
#else
  while (1)
  {
    if (is_blank(input[pos]))
    {
      mark_failure(state_row, pos);
    }
    else if (!has_failure(state_row[pos]))
    {
      *pos_p = pos;
      return;
    }
    pos++;
  }
#endif
}
#endif /*F1 0 */

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        check_terminator(SeparatorType sep_type, unsigned char c)
//
// Description:
//        Check whether c is allowed in the context of sep_type.
//        If sep_type is SepRequired, then c must be a terminator,
//        //else if sep_type is SepNoBlank then c must not be a blank,
//        else, don't care.
//
// Return value:
//        True if c allowed with sep_type, or false else.
//------------------------------------------------------------------------------
*/

static int
check_terminator(SeparatorType sep_type, unsigned char c)
{
  switch (sep_type)
  {
    case SepDontCare:
      return 1;
      break;
    case SepRequired:
      return is_terminator(c);
      break;
    /* case SepNoBlank: */
      /* return !is_blank(c); */
      /* break; */
  }
  assert(0); /* not reached */
  return 0;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static const unsigned char*
//        approx_match_lexeme(const unsigned char* input, const unsigned char* lexeme,
//                            LexemeType lex_type, SeparatorType sep_type)
//
// Description:
//        Find approximate match of lexeme of type lex_type with any
//        prefix of the input. If sep_type is SepRequired, then the matched text
//        must be followed by a terminator (in multi-tokens and single-tokens).
//        //Else, if SepRequired is SepNoBlank, then the matched text must not
//        //be followed by a blank (in parts-tokens).
//        A space in the terminal matches one or more blanks in the input.
//        If a character cannot be matched literally, it is tried to match
//        its translation.
//
// Return value:
//        A pointer to the first unmatched character, if a non-empty prefix
//        could be matched, or NULL else.
//
// To do:
//        Branch and bound for minimal edit distance.
//------------------------------------------------------------------------------
*/

static const unsigned char*
approx_match_lexeme(const unsigned char* input, const unsigned char* lexeme,
                    LexemeType lex_type, SeparatorType sep_type)
{
  unsigned char c;
  unsigned char lex_mark = get_lex_mark(lex_type);
  if (lex_mark != EmptyMark)
    if (*lexeme++ != lex_mark)
      return NULL;
  while (!is_eos(c = *lexeme++))
  {
    unsigned char d = *input;
    if (is_space(c))
    {
      if (!is_blank(d))
        return NULL;
      input++;
      while (is_blank(*input))
        input++;
    }
    else if (c == d)
      input++;
    else if (c == translate(d))
      input++;
    else
      return NULL;
  }
  if (!check_terminator(sep_type, *input))
    return NULL;
  return input;
}

/*
//------------------------------------------------------------------------------
// Function:
//        int bin_search(unsigned char c, const unsigned char* p, unsigned size)
//
// Description:
//        Binary search for c in array p with size > 0.
//
// Return value:
//        Index of c in array, or -1 else.
//------------------------------------------------------------------------------
*/

static int
bin_search(unsigned char c, const unsigned char* p, unsigned size)
{
  unsigned low = 0;
  unsigned high = size;
  do
  {
    unsigned mid = (low + high) / 2;
    unsigned char d = p[mid];
    if (c < d)
      high = mid;
    else if (c > d)
      low = mid + 1;
    else
      return mid;
  }
  while (low < high);
  return -1;
}

/*
//------------------------------------------------------------------------------
// Type:
//        TrieFrame, TrieData
//
// Description:
//        TrieData contains the parameters of the trie search process,
//        and a stack with TrieFrame stack frames representing states
//        of the search process. Initially, one new frame is pushed onto
//        the stack, and each time the search process forks, another frame
//        is pushed. If a search branch fails, or is exhausted, its frame
//        is popped from the stack. If searching is successful, the state
//        of the searching process is stored in the frame, and the results
//        are stored in TrieData.
//
//	2000-05-01 FN (adding forking if translation available)
//	Since we can no longer rely on the last char in the lexeme_buffer
//	(it may be the untranslated char), the correct char now lives
//	in the frame, and the position (last_pos, inside the lex_buf)
//	where it should go is put there too, instead of lex_end
//	(which is derived after restoring).
//------------------------------------------------------------------------------
*/

typedef struct
{
    unsigned		offset;		/* next trie offset */
    const unsigned char*	input;		/* next input position */
    unsigned char*		lex_last_pos;	/* points to last char of lexeme */
/*unused  unsigned char	next_char;	/ * next char to be inserted */
    unsigned char		lex_last_char;	/* char to put into last_pos */
} TrieFrame;

static void
save_frame(TrieFrame* fp, unsigned offset, const unsigned char* input,
	   unsigned char* lex_last_pos, unsigned char lex_last_char)
{
    fp->offset = offset;
    fp->input = input;
    fp->lex_last_pos = lex_last_pos;
    fp->lex_last_char = lex_last_char;
}

static void
restore_frame(const TrieFrame* fp, unsigned* p_offset, const unsigned char** p_input,
	      unsigned char** p_lex_last_pos, unsigned char *p_lex_last_char)
{
    *p_offset = fp->offset;
    *p_input = fp->input;
    *p_lex_last_pos = fp->lex_last_pos;
    *p_lex_last_char = fp->lex_last_char;
}

enum { MaxLexLen = 1024 };

typedef struct
{
    SeparatorType sep_type;		/* lexeme terminator type */
    unsigned char	lex_mark;		/* marker indicating lexeme type */
    unsigned char*	lex_end;		/* pointer beyond last lexeme */
    unsigned	info;			/* info of last lexeme */
    TrieFrame*	sp;			/* pointer to next free frame; */
    TrieFrame	stack[MaxLexLen];	/* frame stack */
    unsigned char	lex_buf[1 + MaxLexLen];	/* holds text of matched lexeme */
    unsigned char	*lex_begin;		/* points to matched lexeme */
} TrieData;

static TrieFrame*
get_top_frame(const TrieData* data)
{
  return (data->sp == data->stack) ? NULL : data->sp - 1;
}

static TrieFrame*
push_frame(TrieData* data)
{
  assert(data->sp < &data->stack[MaxLexLen]);
  return data->sp++;
}

static void
pop_frame(TrieData* data)
{
  assert(data->sp > data->stack);
  data->sp--;
}

static void
init_trie_data(TrieData* data, const unsigned char* input, 
               LexemeType lex_type, SeparatorType sep_type)
{
  data->lex_begin = data->lex_buf + 1;
  save_frame(data->stack, 0, input, data->lex_buf, '\0');
  data->sp = data->stack + 1;
  data->sep_type = sep_type;
  data->lex_mark = get_lex_mark(lex_type);
}

static void
mark_prefix(TrieData* data, long info, unsigned char* lex_end)
{
  data->info = info;
  data->lex_end = lex_end;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static const unsigned char*
//        approx_match_trie(const Trie* trie, TrieData* data)
//
// Description:
//        Search next lexeme in trie, beginning in the state that is stored
//        on top of the search stack. If a non-empty prefix can be matched,
//        its related info is stored in data. At the first invocation, any
//        marker indicating a lexeme type in frame is matched, and set 
//        //F1 to empty. A blank in the trie matches zero or more blanks in the
//        to empty. A blank in the trie matches one or more blanks in the
//        input. If a character cannot be matched literally, it is tried
//        to match its translation. If both a literal character and its
//        translation can be matched, the search process is forked by pushing
//        a new frame representing the second branch on the stack.
//        If a prefix has been matched, it is checked whether it should be
//        followed by a terminator. While matching input, the text of the
//        matched lexeme in the trie is copied into the buffer in data.
//
// Return value:
//        A pointer to the first unmatched character, if a non-empty prefix
//        could be matched, or NULL else.
//
// To do:
//        Branch and bound for minimal edit distance;
// Done: always fork search. (2000-05-01)
//	(FN: I hope "in case of translation" is meant by "always")
//------------------------------------------------------------------------------
*/

enum { WORD_SZ = sizeof(Trie*) };

static const unsigned char*
align(const unsigned char* p)
{
  unsigned mod = (unsigned)p % WORD_SZ;
  return mod ? p + WORD_SZ - mod : p;
}

enum { TRIE_PATH_IDX = 0, TRIE_SIZE_IDX = 1, TRIE_HEADER_SZ = 2 };

static const unsigned char*
approx_match_trie(const Trie* trie, TrieData* data)
/* returns NULL if there are no (more) matches by any remaining frame
*/
{
  unsigned offset;
  const unsigned char* input;
  unsigned char* lex_end;
  unsigned char lex_mark;
  SeparatorType sep_type;
  TrieFrame* fp;

try_from_top_frame:
  fp = get_top_frame(data);
  if (fp == NULL) {
	return NULL;
  }
  {
	unsigned char last_ch;

	/* frame actually contains lex_last_pos, not lex_end */
	restore_frame(fp, &offset, &input, &lex_end, &last_ch);
	*lex_end++ = last_ch;
  }
  lex_mark = data->lex_mark;
  sep_type = data->sep_type;

  while (1)
  {
    const unsigned char* success;
    unsigned char next_char;
    int index;
        /*
        // Move to next trie node, and read header info
        */
    const unsigned char* node = (const unsigned char*)trie + offset;
    unsigned path = node[TRIE_PATH_IDX]; /* length of compressed path */
    unsigned size = node[TRIE_SIZE_IDX]; /* number of subtries */
    node += TRIE_HEADER_SZ;
          /*
        // Match input with characters of prefix path in trie node.
        // If lex_mark is not empty, match it first.
        */
    if (path > 0) {
      if (lex_mark != EmptyMark)
      {
        if (*node++ != lex_mark)
        {
          pop_frame(data);
          goto try_from_top_frame; /* return NULL; */
        }
        data->lex_mark = lex_mark = EmptyMark;
        path--;
      }
      while (path--) {
	unsigned char c = *node++;
	next_char = *input;
	if (is_space(c)) {
	    if (!is_blank(next_char)) {
		pop_frame(data);
		goto try_from_top_frame; /* return NULL; */
	    }
	    input++;
	    while (is_blank(*input)) {
		input++;
	    }
	} else if (c == next_char) {
	    /* FN: in path, so there can't be a translation as well */
	    input++;
	} else if (c == translate(next_char)) {
	    input++;
	} else {
	    pop_frame(data);
	    goto try_from_top_frame; /* return NULL; */
	}
	*lex_end++ = c;
      } /* while (path--) */
    } /* if (path > 0) */
        /*
        // If lex_mark is still not empty, match it first.
        */
    success = NULL;
    if (lex_mark != EmptyMark)
    {
      next_char = lex_mark;
      data->lex_mark = lex_mark = EmptyMark;
    }
    else
    {
        /*
        // If we have a prefix, check terminator. If success,
        // set position and save node info.
        */
      if (is_eos(*node))
      {
        if (check_terminator(sep_type, *input))
        {
          long info = *(long*)align(node + size);
          mark_prefix(data, info, lex_end);
          success = input;
        }
      }
        /*
        // Get next input character. If end-of-string, return success, if any.
        // Else, if blank, skip blanks, and try to match space.
        */
      next_char = *input++;
      if (is_eos(next_char))
      {
        pop_frame(data);
        /* return success; */
	if (success != NULL) {
	    return success;
	} else {
	    goto try_from_top_frame;
	}
      }
      else if (is_blank(next_char))
      {
        while (is_blank(*input))
          input++;
        next_char = SpaceMark;
      }
    }
        /*
        // Try to match the next input character in the branches.
        */
    index = bin_search(next_char, node, size);
    if (next_char != translate(next_char)) {
	int tndex = bin_search(translate(next_char), node, size);
	if (index < 0 && tndex < 0) {
	    /* neither path available */
	    pop_frame(data);
	    /* return success; */
	    if (success != NULL) {
		return success;
	    } else {
		goto try_from_top_frame;
	    }
	} else if (index < 0) {
	    /* only translated path available */
	    next_char = translate(next_char);
	    index = tndex;
	} else if (tndex >= 0) {
	    /* both paths available */
	    unsigned tffset = *(long*)align(node + size + WORD_SZ * tndex);
	    save_frame(fp, tffset, input, lex_end, translate(next_char));
	    fp = push_frame(data);
	} else {
	    /* only untranslated path available */
	    /* no action needed */
	}
    } else if (index < 0) {
	pop_frame(data);
	/* return success; */
	if (success != NULL) {
	    return success;
	} else {
	    goto try_from_top_frame;
	}
    }
    /*Ftrans *lex_end++ = next_char;	** moved below */
        /*
        // Fetch offset of next trie node. If we have found
        // an info, remember the current offset and input position,
        // and return the last prefix.
        */
    offset = *(long*)align(node + size + WORD_SZ * index);
    if (success != NULL)
    {
      save_frame(fp, offset, input, lex_end, next_char);
      return success;
    }
    *lex_end++ = next_char;	/* Ftrans moved here */
  }
}

/*
//------------------------------------------------------------------------------
// Function:
//        static void
//        init_regexp_limit(TokenLimit* token, const unsigned char* str)
//
//        static void
//        init_list_limit(TokenLimit* token, const unsigned char* str)
//
//        static void
//        delimit_token(const TokenLimit* limit)
//
//        static void
//        restore_token(TokenLimit* limit)
//
// Description:
//        init_regexp_limit() finds boundary of str for regexp match.
//        init_list_limit() finds boundary of str for list match.
//        delimit_token() delimits token with end-of-string.
//        restore_token() restores the boundary of the token.
//------------------------------------------------------------------------------
*/

typedef struct
{
  unsigned char*        pos;
  unsigned char        save;
} TokenLimit;

static void
init_regexp_limit(TokenLimit* limit, const unsigned char* str)
{
  unsigned char c;
  while (!is_eos(c = *str) /*F1 && !is_blank(c) */)
    str++;
  limit->pos = (unsigned char*)str;
}
#define init_regexp_limit_to(limit, str, end) \
	((limit)->pos = (unsigned char*)(end))

#if USING_MATCH_LIST
static void
init_list_limit(TokenLimit* limit, const unsigned char* str)
{
#if 0
  unsigned char c;
  while (!is_eos(c = *str) && !is_delimiter(c, ))
    str++;
  limit->pos = (unsigned char*)str;
#endif
}
#endif

static void
delimit_token(TokenLimit* limit)
{
  unsigned char* str = limit->pos;
  limit->save = *str;
  *str = EosMark;
}

static void
restore_token(TokenLimit* limit)
{
  unsigned char* str = limit->pos;
  *str = limit->save;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static const unsigned char*
//        match_regexp_token(const unsigned char* str, TokenLimit* limit,
//                           const RegExp* regexp, SeparatorType sep_type)
//
// Description:
//        Match regexp with non-empty prefix of str. The prefix should
//        be followed by a character acceptable to sep_type. TokenLimit limit
//        is used for delimiting and restoring the token that can be matched.
//
// Return value:
//        Pointer to first unmatched char of non-empty prefix of str
//        matching regexp, if any, or NULL else.
//------------------------------------------------------------------------------
*/

static const unsigned char*
match_regexp_token(const unsigned char* input, TokenLimit* limit,
                   const RegExp* regexp, SeparatorType sep_type)
{
  unsigned char* str;
  delimit_token(limit);
/* rtsMessage("Calling match_regexp(%p, %p)\n", input, regexp); */
  str = (unsigned char*)match_regexp((const char*)input, (RegExp*)regexp);
  restore_token(limit);
  if (str != NULL)
    if (str == input || !check_terminator(sep_type, *str))
      str = NULL;
  return str;
}

static void
insert_new_regexp_transition(StateNode** state_row, unsigned pos, int class,
				unsigned id, const unsigned char* from,
				const unsigned char* to, RegType reg_type)
{
        Transition* transition = alloc_transition();
        unsigned length = to - from;
        init_regexp_transition(transition, pos, id, from, to, reg_type);
        insert_transition(state_row, pos, class, transition, length);
}

/*
//------------------------------------------------------------------------------
// Function:
//      static unsigned
//        match_regexps(RegType reg_type, State** state_row,
//                      const unsigned char* input, unsigned pos)
//
// Description:
//        Match all regexps at position pos in input. For each match,
//        a new state is created and inserted at position pos in state_row.
//        The boundary of the token to be matched is stored in TokenLimit.
//
// Return value:
//        The number of matched regexps.
//------------------------------------------------------------------------------
*/

static unsigned
match_regexps(RegType reg_type, StateNode** state_row,
#if NONT_CLASSES
                int nont_class,
#endif
		const unsigned char* input, unsigned pos,
		SeparatorType sep_type, int do_all_lex_types)
{
  unsigned nr_matches = 0;
  unsigned nr_regexps = get_nr_regexps(reg_type);
  const unsigned char* from = input + pos;
  TokenLimit limit;
  unsigned i;

  init_regexp_limit(&limit, from);
  for (i = 0; i < nr_regexps; i++)
  {
    const RegExp* regexp = get_regexp(reg_type, i);
    if (regexp != NULL
	&& (do_all_lex_types || regexp_lex_type(i, reg_type) == SingleToken))
    {
      const unsigned char* to
		= match_regexp_token(from, &limit, regexp, sep_type);
		/* NOTE: if SepRequired, we may not find an existing match:
		 * if the longest match is not followed by a terminator,
		 * the match fails; there may however be a shorter match
		 * which _is_ followed by a terminator
		 */

      if (to != NULL) {
	insert_new_regexp_transition(state_row, pos, nont_class,
						i, from, to, reg_type);
#if ALSO_SHORTER_REGEXPS
	while (--to > from) {
	    TokenLimit tlim;
	    if (sep_type == SepRequired) {
		while (!is_terminator(*to) && (--to > from)) ;
		if (to <= from) {
		    break;
		}
	    }
	    init_regexp_limit_to(&tlim, from, to);

	    /* see NOTE above */
	    to = match_regexp_token(from, &tlim, regexp, sep_type);
	    if (to == NULL) {
		    break;
	    } else {
		    insert_new_regexp_transition(state_row, pos, nont_class,
							i, from, to, reg_type);
	    }
	}
#endif /* ALSO_SHORTER_REGEXPS */
        nr_matches++; /* only count once per RE */
      }
    }
  }
  return nr_matches;
}

static int
match_regexp_matches(StateNode** state_row, const unsigned char* input,
		unsigned pos, SeparatorType sep_type, int do_all_lex_types)
{
  return match_regexps(RegMatch, state_row,
#if NONT_CLASSES
                re_match_class,
#endif
                input, pos, sep_type, do_all_lex_types) > 0;
}

static int
match_regexp_skips(StateNode** state_row, const unsigned char* input,
		unsigned pos, SeparatorType sep_type, int do_all_lex_types)
{
  return match_regexps(RegSkip, state_row,
#if NONT_CLASSES
                re_skip_class,
#endif
                input, pos, sep_type, do_all_lex_types) > 0;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        match_lexicon_terminals(StateNode** state_row, const unsigned char* input,
//                                unsigned pos,
//                                LexemeType lex_type, SeparatorType sep_type)
//
// Description:
//        Match all lexicon terminals at position pos in input with lex_type
//        and sep_type. For each match, a new state is created and inserted
//        at position pos in state_row.
//        Lexicon terminals are identified by approx_match_trie(),
//        saving each info, the matched lexeme and the next search position
//        in TrieFrame frame.
//
// Return value:
//        The number of matched lexicon terminals.
//------------------------------------------------------------------------------
*/

static int
match_lexicon_terminals(StateNode** state_row, const unsigned char* input,
                        unsigned pos,
                        LexemeType lex_type, SeparatorType sep_type)
{
  unsigned nr_matches = 0;
  const Trie* trie = get_trie();
  if (trie != NULL)
  {
    const unsigned char* from = input + pos;
    const unsigned char* to;
    TrieData trie_data;
    init_trie_data(&trie_data, from, lex_type, sep_type);
    while ((to = approx_match_trie(trie, &trie_data)) != NULL)
    {
      const unsigned char* lex_beg = trie_data.lex_begin;
      const unsigned char* lex_end = trie_data.lex_end;
      long entry_idx = trie_data.info;
      Transition* transition = alloc_transition();
      unsigned length = to - from; /* length of input, not lex data */
      if (lex_type != SingleToken)
      {
        /* skip control-character indicating lexeme type */
        assert(*lex_beg == get_lex_mark(lex_type));
        lex_beg++;
        /* length (of input) not affected */
      }
      init_lexicon_transition(transition, pos,
                                entry_idx, lex_beg, lex_end, lex_type);
      expand_and_insert_transition(state_row, pos,
                                        entry_idx, transition, length);
      nr_matches++;
    }
  }
  return nr_matches;
}

/*
//------------------------------------------------------------------------------
// Function:
//      static int
//        match_grammar_terminals(StateNode** state_row, const unsigned char* input,
//                                unsigned pos,
//                                LexemeType lex_type, SeparatorType sep_type)
//
// Description:
//        Match all grammar terminals at position pos in input with lex_type
//        and sep_type. For each match, a new state is created and inserted
//        at position pos in state_row.
//
// Return value:
//        The number of matched grammar terminals.
//------------------------------------------------------------------------------
*/

static int
match_grammar_terminals(StateNode** state_row, const unsigned char* input,
                        unsigned pos,
                        LexemeType lex_type, SeparatorType sep_type)
{
  unsigned nr_matches = 0;
  unsigned nr_terminals = get_nr_terminals();
  const unsigned char* from = input + pos;
  unsigned i;
  for (i = 0; i < nr_terminals; i++)
  {
    const unsigned char* lexeme = get_terminal(i);
    const unsigned char* to = approx_match_lexeme(from, lexeme, lex_type, sep_type);
    if (to != NULL)
    {
      Transition* transition = alloc_transition();
      unsigned length = to - from;
      init_terminal_transition(transition, pos, i, from, to, lex_type);
      insert_transition(state_row, pos, gr_term_class, transition, length);
      nr_matches++;
    }
  }
  return nr_matches;
}

static unsigned
match_terminals(StateNode** state_row, const unsigned char* input, unsigned pos,
                LexemeType lex_t, SeparatorType sep_t)
{
  unsigned nr_matches = 0;
  nr_matches += match_grammar_terminals(state_row, input, pos, lex_t, sep_t);
  nr_matches += match_lexicon_terminals(state_row, input, pos, lex_t, sep_t);
  return nr_matches;
}

#if MATCH_TOKENS_WITH_PREFERENCE || defined(PARTS_LEX_PRE2000)
static int
match_multi_tokens(StateNode** state_row, const unsigned char* input, unsigned pos)
{
  return match_terminals(state_row, input, pos, MultiToken, SepRequired) > 0;
}
#endif

#if MATCH_TOKENS_WITH_PREFERENCE
static int
match_single_tokens(StateNode** state_row, const unsigned char* input, unsigned pos)
/*
// Not used, since single-tokens are also covered by parts-tokens.
*/
{
  return match_terminals(state_row, input, pos, SingleToken, SepRequired) > 0;
}
#endif

/*
//------------------------------------------------------------------------------
// Type:
//        PartState
//
// Description:
//        PartState represents the state of the finite state machine for
//        recognizing parts-tokens. The state contains bits for indicating
//        the token types that have been recognized. A new state is obtained
//        by or-ing the previous state with a StateBits.
//        A PartState is final if its associated token contains at least
//        a single token or an infix. Otherwise, the token should be extended
//        with other parts.
//------------------------------------------------------------------------------
*/

typedef StateBits        PartState;

static PartState
get_initial_part_state(void)
{
  return 0x0000;
}

#ifndef PARTS_LEX_PRE2000
/* since aug. 2000, the part_state reflects just the part_bits of the
** last part seen, so we can tell whether the last part is a valid final part
** (the pre-2000 version was an OR of all part_bits seen)
*/

static PartState
get_next_part_state(Transition* state, PartState part_state)
{
    return state->type & PartBitsMask; /* the masking was missing in pre2000 */
}

static int
is_final_part_state(PartState part_state)
{
    return part_state & (SuffixBit | SingleTokenBit | MultiTokenBit);
}

static int
can_take_part_state_trans(PartState cur_part_state, Transition* state)
{
    switch (state->type & PartBitsMask) {
	case InfixBit:
	case SuffixBit:
	    return !(cur_part_state == get_initial_part_state());
	case PrefixBit:
	case SingleTokenBit:
	case MultiTokenBit:
	    return !(cur_part_state
		     & (SuffixBit | SingleTokenBit | MultiTokenBit));
	default:
	    assert(0);
    }
    return 0;
}
#else /* PARTS_LEX_PRE2000: */
static PartState
get_next_part_state(Transition* state, PartState part_state)
{
  return state->type | part_state;
}

static int
is_final_part_state(PartState part_state)
{
  return part_state & (SingleTokenBit | InfixBit);
}

/*
// if allow_multiple_single_token_parts, a parts_token contains:
//                prefix* (infix|word) (infix|word)* suffix*
//        i.e.: infix and word (=SingleToken) are same kind
// else:
//                prefix* infix* (infix|word) infix* suffix*
//        i.e.: at least one infix or SingleToken, at most one SingleToken
*/

static int
can_take_part_state_trans(PartState cur_part_state, Transition* state)
{
  switch (state->type & PartBitsMask)
  {
    case PrefixBit:
        if (!(cur_part_state & (SingleTokenBit | InfixBit)))
        {
                return 1;
        }
        break;
    case InfixBit:
        if (!(cur_part_state & SuffixBit))
        {
                return 1;
        }
        break;
    case SingleTokenBit:
        if (!(cur_part_state & (SingleTokenBit | SuffixBit))
            || (allow_multiple_single_token_parts
                && !(cur_part_state & (SuffixBit))))
        {
                return 1;
        }
        break;
    case SuffixBit:
        if (cur_part_state & (SingleTokenBit | InfixBit))
        {
                return 1;
        }
        break;
    default:
        assert(0);
  }
  return 0;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        allow_parts(PartState part_state, LexemeType lex_type,
//                    SeparatorType* p_sep_type)
//
// Description:
//        Check whether part with type lex_type is allowed in state part_state.
//        //Furthermore, set location pointed to by p_sep_type to SepNoBlank
//        //if the parts-token may not be completed yet, or to SepDontCare else.
//
// Return value:
//        True, if the part is allowed, or false else.
// Note:
//        Not used anymore (kept for a rainy day?).
//        Also note that the if-conditions are the reverse of 
//        those in can_take_part_state_trans().
//------------------------------------------------------------------------------
*/

static int
allow_parts(PartState part_state, LexemeType lex_type,
            SeparatorType* p_sep_type)
{
  SeparatorType sep_type;
  switch (lex_type)
  {
    case Prefix:
      if (part_state & (SingleTokenBit | InfixBit))
        return 0;
      /* sep_type = SepNoBlank; */
      sep_type = SepDontCare;
      break;
    case Infix:
      if (part_state & SuffixBit)
        return 0;
      sep_type = SepDontCare;
      break;
    case SingleToken:
      if ((part_state & (SingleTokenBit | SuffixBit))
          || (allow_multiple_single_token_parts
              && (part_state & (SuffixBit))))
      {
        return 0;
      }
      sep_type = SepDontCare;
      break;
    case Suffix:
      if (!(part_state & (SingleTokenBit | InfixBit)))
        return 0;
      sep_type = SepDontCare;
      break;
    case MultiToken:
      return 0;
      break;
  }
  *p_sep_type = sep_type;
  return 1;
}
#endif /* PARTS_LEX_PRE2000 */

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        always_allow_parts(PartState part_state, LexemeType lex_type,
//                                SeparatorType* p_sep_type)
//
// Description:
//        Replaces allow_parts() check in first pass of match_parts.
//        Since this pass now (1998/08/19) allows all parts in any order,
//        this function is only used to determine the terminator type:
//        //set location pointed to by p_sep_type to SepNoBlank
//        //if the parts-token may not be completed yet, or to SepDontCare else.
//
// Return value:
//        True, the part is always allowed.
//------------------------------------------------------------------------------
*/

static int
always_allow_parts(PartState part_state, LexemeType lex_type,
            SeparatorType* p_sep_type)
{

  /* if (lex_type == Prefix)	*/
  /* {	*/
  /*   *p_sep_type = SepNoBlank;*/
  /* }	*/
  /* else	*/
  {
    *p_sep_type = SepDontCare;
  } 
  return 1;
}

/*
// The translation of a part_state to the corresponding bit in
// AggregatePartStates is done here. The rest of this file may use
// the knowledge that AggregatePartStates is a bit_set (so other routines
// can use & and | on it and initialize an aggregate to 0).
*/

static AggregatePartStates
part_state2set_bit(PartState part_state)
{
  return 1 << (part_state & PartBitsMask);
}

static AggregatePartStates
initial_part_states_subset(AggregatePartStates cur_set)
{
  return cur_set & part_state2set_bit(get_initial_part_state());
}

static AggregatePartStates
final_part_states_subset(AggregatePartStates cur_set)
/* TODO: optimize by creating a final_set mask at program initialization */
{
  PartState cur_part_state;
  AggregatePartStates result = 0;
  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
  {
    if (cur_set & part_state2set_bit(cur_part_state))
    {
      if (is_final_part_state(cur_part_state))
      {
        result |= part_state2set_bit(cur_part_state);
      }
    }
  }
  return result;
}

static AggregatePartStates
next_partstate_bit(Transition* state, PartState part_state)
{
  return part_state2set_bit(get_next_part_state(state, part_state));
}

/*
//------------------------------------------------------------------------------
// Function:
//        static AggregatePartStates
//        add_trans_dest_part_states(AggregatePartStates cur_aggr, State* state,
//                                        AggregatePartStates* next_aggr_p)
// Description:
//        For each of the possible part_states at the current position,
//        add the part_state resulting from this transition (if it's a
//        valid one) to the part_state_set at the destination position.
// Return value:
//        A bit array with all part_states occurring at this position
//        that can also make a transition through this State.
//------------------------------------------------------------------------------
*/

static void
add_trans_dest_part_states(AggregatePartStates cur_aggr, Transition* transition,
                                AggregatePartStates* next_aggr_p)
{
  PartState cur_part_state;

  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
  {
    if (cur_aggr & part_state2set_bit(cur_part_state))
    {
      if (can_take_part_state_trans(cur_part_state, transition))
      {
        *next_aggr_p |= next_partstate_bit(transition, cur_part_state);
      }
    }
  }
}

/*
//------------------------------------------------------------------------------
// Function:
// Description:
//        for all part_states in the current set, check if this transition
//        would result in a part_state in the 'next' set.
// Return value:
//        The subset of cur_aggr for which the above test succeeds.
//------------------------------------------------------------------------------
*/

static AggregatePartStates
find_parts_trans_live_subset(AggregatePartStates cur_aggr,
                        Transition* transition, AggregatePartStates next_aggr)
{
  PartState cur_part_state;
  AggregatePartStates result = 0;

  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
  {
    if (cur_aggr & part_state2set_bit(cur_part_state))
    {
      if (can_take_part_state_trans(cur_part_state, transition)
          && (next_aggr & next_partstate_bit(transition, cur_part_state)))
      {
        result |= part_state2set_bit(cur_part_state);
      }
    }
  }
  return result;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static void
//        add_parts_transitions(Trellis* trellis, const unsigned char* input,
//                unsigned pos, PartState part_state, Position *last_part_end);
//
// Description:
//        For each state at position pos in state_row, try to extend a
//        parts-token by matching new parts in the next PartState.
//        At this point, we match all parts, regardless of the validness
//        of the resulting sequence (validity is checked after matching
//        a complete parts_token, the components of a valid parts_token
//        get the TokenPartBit on; dead parts are removed after lexing
//        the whole sentence).
//        If a parts-token cannot be extended, and it ends at a
//        terminator, the rest of the input is lexicalized, resulting in a
//        normal transition to the next token.
//        If a parts-token cannot be extended and there is no terminator,
//        the state is deleted. This means that no part at all could be
//        matched after the current one.
//        The order of the parts in the list is not yet important;
//        the list is not yet sorted and its tail is not yet shared.
//        The length of the part in each state should have been coded in the
//        trans-field of each state.
//        Uses last_part_end to keep track of the last position at which
//        a parts token has been found to end //F1 (usually the position of the
//        //F1 blank after all the parts).
//------------------------------------------------------------------------------
*/

static StateNode*
match_parts(Trellis* trellis, const unsigned char* input, unsigned pos,
            PartState part_state, Position *last_part_end);

/* void delete_transition(Transition* transition); */

/* does not really add transitions, only finds successor parts
// Deletes transition if there are no following parts
// and the next character is not a terminator.
*/
static void
add_parts_transitions(Trellis* trellis, const unsigned char* input, unsigned pos,
                      PartState prev_part_state, Position *last_part_end)
{
#if NONT_CLASSES
  StateNode** parts_row = trellis->pstates_row;
  StateNode* pst_node = trellis->pstates_row[pos];
  int n_found_classes = 0;        /* nr of classes with live parts */
  int class;

#ifdef PARTS_TRACE
  rtsMessage("add_parts_transitions begin for pos%d lpe%d\n",
						pos, *last_part_end);
#endif
  assert(pst_node != NULL);
  for (class = NR_classes - 1; class >= 0; class--)
  {
    Transition* transition = pst_node->trans_lists[class];
    Transition* done = NULL;
#else /* no NONT_CLASSES */
    State** p_row = trellis->parts_row;
    State* transition = p_row[pos];
    State* done = NULL;
#endif /* NONT_CLASSES */
    while (transition != NULL)
    {
      Transition* next = transition->next;
      PartState part_state = get_next_part_state(transition, prev_part_state);
      Position next_pos;
      StateNode* trans_dest = NULL;
      assert(!has_transition(transition));
      next_pos = pos + get_length(transition);

      /* remove !is_separator test to allow separators
      // at the start of a following part
      */
      /*F1 if (!is_separator(input[next_pos]) && !is_blank(input[next_pos])) */
      {
#ifdef PARTS_TRACE
        char * hyph_beg = transition->type & (InfixBit|SuffixBit) ? "-" : "";
        char * hyph_end = transition->type & (InfixBit|PrefixBit) ? "-" : "";
        rtsMessage("add_parts_transitions: trying '%s%s%s'->...\n",
                                        hyph_beg, transition->text, hyph_end);
#endif
        trans_dest
          = match_parts(trellis, input, next_pos, part_state, last_part_end);
#ifdef PARTS_TRACE
        if (trans_dest != NULL)
        {
          rtsMessage("add_parts_transitions: added '%s%s%s'->%p\n",
                        hyph_beg, transition->text, hyph_end, trans_dest);
        }
        else
        {
          rtsMessage("add_parts_transitions: no match '%s%s%s'->\n",
                                        hyph_beg, transition->text, hyph_end);
        }
#endif
      }
#ifdef PARTS_TRACE
      rtsMessage("add_parts_transitions pos%d trans_dest%p next_pos%d\n",
						pos, trans_dest, next_pos);
#endif
      if ((trans_dest != NULL) || is_terminator(input[next_pos]))
      {
        if (trans_dest == NULL)
        {
          if (*last_part_end < next_pos)
          {
            *last_part_end = next_pos;
#ifdef PARTS_TRACE
                rtsMessage("add_parts_transitions pos%d set lpe%d=%d\n",
						pos, *last_part_end, next_pos);
#endif
          }
          /* moved lexing of next token to prune-pass */
        }
        transition->next = done;
        done = transition;
      }
      else
      {
        delete_transition(transition);
      }
      transition = next;
    }
#if NONT_CLASSES
# ifdef PARTS_TRACE
    if (pst_node->trans_lists[class] != NULL)
    {
      rtsMessage("add_parts_transitions: pos %d class %d now %p\n",
                                                pos, class, done);
    }
# endif
    pst_node->trans_lists[class] = done;
    if (done != NULL)
    {
        n_found_classes++;
    }
  } /* for class */
  if (n_found_classes == 0)
  {
        /* We have removed all parts at this pos, remove the parts-StateNode
        // too, to keep the assumption intact that a StateNode is never empty
        */
        free_statenode(parts_row[pos]);
        parts_row[pos] = NULL;
  }
#else /* no NONT_CLASSES */
  p_row[pos] = done;
#endif /* NONT_CLASSES */
#ifdef PARTS_TRACE
  rtsMessage("add_parts_transitions end for pos%d lpe%d\n",
						pos, *last_part_end);
#endif
}

static StateNode*
match_parts(Trellis* trellis, const unsigned char* input, Position pos,
            PartState part_state, Position *last_part_end)
{
#if NONT_CLASSES
  StateNode** parts_row = trellis->pstates_row;
#else
  State** parts_row = trellis->parts_row;
#endif
  SeparatorType sep_type;

  if (parts_row[pos] == NULL)
  {
    if (always_allow_parts(part_state, Prefix, &sep_type))
      match_terminals(parts_row, input, pos, Prefix, sep_type);
    if (always_allow_parts(part_state, Infix, &sep_type))
      match_terminals(parts_row, input, pos, Infix, sep_type);
    if (always_allow_parts(part_state, Suffix, &sep_type))
      match_terminals(parts_row, input, pos, Suffix, sep_type);
    if (always_allow_parts(part_state, SingleToken, &sep_type))
      match_terminals(parts_row, input, pos, SingleToken, sep_type);
#ifndef PARTS_LEX_PRE2000
    if (always_allow_parts(part_state, MultiToken, &sep_type))
      match_terminals(parts_row, input, pos, MultiToken, sep_type);
#endif
#if RE_ALSO_PART
    match_regexp_matches(parts_row, input, pos,
				SepDontCare, 1 /* match all lextypes */);
#endif
    if (parts_row[pos] != NULL)
    {
#if (NONT_CLASSES && defined(PARTS_TRACE) && 0)
      dump_trans_lists_ptrs("match_parts<add",
                                pos, parts_row[pos]->trans_lists);
#endif
      add_parts_transitions(trellis, input, pos, part_state, last_part_end);
#ifdef DEBUG
      if ((parts_row[pos] != NULL) && ((int) *last_part_end <= 0))
      {
# ifdef COUNT_TRACE
        rtsMessage("%s(tok%ld): %ld,+%d'%c%c%c:last__end=%d after add_p__tr\n",
                "match_parts", n_parts_tok_returns + 1,
                        n_trel_builds, pos, input[pos],
                        input[pos+1], input[pos+2], *last_part_end);
# else
        rtsMessage("%s: %p+%d:last_part_end=%d after add_parts_transitions\n",
                        "match_parts", input, pos, *last_part_end);
# endif
      }
#endif
      trellis->last_part_end_from[pos] = *last_part_end;
    }
  }
  else
  {
    if (*last_part_end < trellis->last_part_end_from[pos]) {
	*last_part_end = trellis->last_part_end_from[pos];
    }
#ifdef DEBUG
    if ((int) *last_part_end <= 0)
    {
# ifdef COUNT_TRACE
        rtsMessage("%s(tok%ld): %ld,+%d'%c%c%c:last__end=%d from array\n",
                "match_parts", n_parts_tok_returns + 1,
                        n_trel_builds, pos, input[pos],
                        input[pos+1], input[pos+2], *last_part_end);
# else
        rtsMessage("%s: %p+%d:last_part_end=%d from array\n",
                        "match_parts", input, pos, *last_part_end);
# endif
    }
#endif
  }
  return parts_row[pos];
}

/*
//------------------------------------------------------------------------------
//
// Description:
//        First, forward_mark_partstate_sets() marks each part_start
//        position within the token with bits for
//        all the part_states with which that position can be reached:
//        positions are scanned from left to right; for every
//        transition from a position, the mark at its destination pos is ORed
//        with bits for every part_state that can result from that transition.
//        The mark is called a 'set' of part_states or 'aggregate'.
//
//        Second, backward_mark_partstate_sets() scans the positions from
//        right to left, checking each
//        transition node if, from any of the part_states at the current
//        position, a part_state can be reached that has been found alive
//        at the destination position. If so, the node gets the TokenPartBit,
//        to mark it alive.
//        The part_states at the current position, for which no transition
//        would result in a live destination part_state, are removed
//        from the set (i.e., the old set is overwritten by
//        its subset with live part_states only).
//        If no live parts_transitions are left at a position, and it does have
//        final_part_states in its set, a lexicalize() is tried from there.
//        If that succeeds, the final_part_states are written over the old set.
//        If not, the empty set is written.
//------------------------------------------------------------------------------
*/

static void
forward_mark_partstate_sets(Trellis* trellis,
                                AggregatePartStates *set_array,
                                unsigned first_pos, unsigned last_part_end)
{
  Position cur_pos;
  AggregatePartStates *cur_set = set_array;

  for (cur_pos = first_pos; cur_pos < last_part_end; cur_pos++)
  {
#if NONT_CLASSES
   if (trellis->pstates_row[cur_pos] != NULL)
   {
    Transition** trans_lists = trellis->pstates_row[cur_pos]->trans_lists;
    int class;

    for (class = NR_classes - 1; class >= 0; class--)
    {
      Transition* transition = trans_lists[class];
#  ifdef PARTS_TRACE
      if (transition != NULL)
      {
        rtsMessage("%s: pos %d class %d list:%p\n",
                "forward_mark_", cur_pos, class, transition);
      }
#  endif
#else /* no NONT_CLASSES */
      State* transition = trellis->parts_row[cur_pos];
#endif /* NONT_CLASSES */
      while (transition != NULL)
      {
        Transition* next = transition->next;
        unsigned pos_dist = get_length(transition);
        /* cur_set: set of part_states possible at cur_pos
         * cur_set[pos_dist]: set of part_states possible at pos after trans
         */
        /* The real work is to OR cur_set[pos_dist] with the part_states_set
         * with which pos_dist is reached when this transition is taken
         * (the ORing is done in add_trans_dest_part_states).
         * Removing impossible transition states from the list can't
         * be done here, because the part may be valid as a component of
         * another token.
         */
        add_trans_dest_part_states(*cur_set, transition, &(cur_set[pos_dist]));
        transition = next;
      } /* while */
#if NONT_CLASSES
    } /* for class */
   } /* if */
#endif
    cur_set++;
  } /* for pos */
}

static AggregatePartStates
find_mark_partstate_live_set(Position cur_pos, Transition* transition,
                                                AggregatePartStates* cur_set)
{
    AggregatePartStates return_set = 0;
    /* WAS crashes here due to SIGBUS: transition = FAILURE */
    while (transition != NULL)
    {
      unsigned pos_dist = get_length(transition);
      AggregatePartStates trans_live_set;

#ifdef PARTS_TRACE
      char * hyph_beg = transition->type & (InfixBit|SuffixBit) ? "-" : "";
      char * hyph_end = transition->type & (InfixBit|PrefixBit) ? "-" : "";

      rtsMessage("%s: pos %d state %p'%s%s%s', set x%lx->x%lx, next %p\n",
                "find_mark_", cur_pos, transition,
                                hyph_beg, transition->text, hyph_end,
                                *cur_set, cur_set[pos_dist], transition->next);
#endif
      /* npx bug if: assert(!is_token_start(transition)); */

      /* trans_live_set: set of part_states for which this transition
      //        would result in a live transition path
      // return_set: OR of all trans_live_sets in this transition list
      // *cur_set: set of part_states possible at cur_pos
      // cur_set[pos_dist]: set of live part_states at pos after trans
      */
      trans_live_set
        = find_parts_trans_live_subset(*cur_set, transition, cur_set[pos_dist]);
#ifdef PARTS_TRACE
      rtsMessage(
        "%s: pos %d state %p'%s%s%s' live x%lx, set x%lx->x%lx, next %p\n",
        "find_mark_", cur_pos, transition,
                        hyph_beg, transition->text, hyph_end, trans_live_set,
                                *cur_set, cur_set[pos_dist], transition->next);
#endif
      if (trans_live_set != 0)
      {
        if (initial_part_states_subset(trans_live_set))
        {
          /* this part can be start of token */
	  assert(!is_token_start(transition)); /* after trying to fix npx bug */
          mark_token_start(transition);
        }
        return_set |= trans_live_set;
        if (cur_set[pos_dist] & NextTokenStartsHere)
        {
#ifdef PARTS_TRACE
          rtsMessage("find_mark_: pos %d state %p'%s%s%s' final\n",
                                cur_pos, transition,
                                hyph_beg, transition->text, hyph_end);
#endif
          mark_live_final_part(transition);
        }
        else
        {
#ifdef PARTS_TRACE
          rtsMessage("find_mark_: pos %d state %p'%s%s%s' nonfinal\n",
                                cur_pos, transition,
                                hyph_beg, transition->text, hyph_end);
#endif
          mark_live_nonfinal_part(transition);
        }
      }
      transition = transition->next;
    } /* while */
    return return_set;
}

static void
backward_mark_partstate_sets(Trellis* trellis, AggregatePartStates *set_array,
        const unsigned char* input, Position first_pos, unsigned last_part_end)
{
#if NONT_CLASSES
  StateNode** parts_row = trellis->pstates_row;
#else
  State** parts_row = trellis->parts_row;
#endif
  int cur_pos; /* must be signed due to >= */
  AggregatePartStates* cur_set = set_array + last_part_end - first_pos;
#ifdef PARTS_TRACE
  rtsMessage("%s: pos %d..%d sets:",
                "backward_mark_", first_pos, last_part_end);
  for (cur_pos = first_pos; cur_pos <= last_part_end; cur_pos++)
  {
      rtsMessage(" x%lx", set_array[cur_pos - first_pos]);
  }
  rtsMessage("\n");
#endif
  for (cur_pos = last_part_end; cur_pos >= (int) first_pos; cur_pos--)
  {
    /* live_set: OR of all trans_live_sets at this position */
    AggregatePartStates live_set = 0;

    /* no FAILURES are used (yet) in parts_row,
    // but we leave the test here in case things get redesigned :-)
    */
    if (parts_row[cur_pos] != NULL && !has_failure(parts_row[cur_pos]))
    {
#if NONT_CLASSES
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
      {
        Transition* transition = parts_row[cur_pos]->trans_lists[class];
#  ifdef PARTS_TRACE
        if (transition != NULL)
        {
          rtsMessage("%s: pos %d class %d list:%p\n",
                "backward_mark_", cur_pos, class, transition);
        }
#  endif
#else /* no NONT_CLASSES */
        State* transition = parts_row[cur_pos];
#endif /* NONT_CLASSES */
        live_set |= find_mark_partstate_live_set(cur_pos, transition, cur_set);
#if NONT_CLASSES
      } /* for class */
#endif
    } /* if !=NULL && !has_failure */
    if ((live_set == 0) && is_terminator(input[cur_pos]))
    {
        live_set = final_part_states_subset(*cur_set);
#ifdef PARTS_TRACE
        rtsMessage("%s: is_sep pos %d live x%lx, set x%lx\n",
                "backward_mark_", cur_pos, live_set, *cur_set);
#endif
        if (live_set != 0)
        {
#ifdef PARTS_TRACE
          rtsMessage("backward_mark_: pos %d:lexing\n", cur_pos);
#endif
          lexicalize(trellis, input, cur_pos); /* always succeeds */
#ifdef PARTS_TRACE
          rtsMessage("backward_mark_: pos %d NextTokenStartsHere\n", cur_pos);
#endif
          live_set |= NextTokenStartsHere;
        }
    }
    *cur_set = live_set;
    cur_set--;
  } /* for cur_pos */
}

static void
mark_partstate_sets(Trellis* trellis, const unsigned char* input,
                        unsigned first_pos, unsigned last_part_end)
{
  AggregatePartStates *aggr_array
                = alloc_init_aggregate_array(last_part_end + 1 - first_pos);
#if 0
  rtsMessage("mark_partstate_sets, pos %d..%d\n", first_pos, last_part_end);
#endif
  aggr_array[0] = part_state2set_bit(get_initial_part_state());
  forward_mark_partstate_sets(trellis, aggr_array, first_pos, last_part_end);
  backward_mark_partstate_sets(trellis, aggr_array,
                                        input, first_pos, last_part_end);
  free_aggregate_array(aggr_array);
}

/*
// forward_mark_partstate_sets(), backward_mark_partstate_sets(),
// and mark_partstate_sets() are called after each parts_token; 
// remove_nonlive_token_parts() and fix_parts_transitions() are
// called after lexing sentence.
*/

/*
//------------------------------------------------------------------------------
// 
//------------------------------------------------------------------------------
*/

static Transition*
reorder_parts_list_return_starters(Transition** tlist_handle)
{
  Transition* starter_list;
  /* 2 handles to the destination where the respective pointers
  //        should be stored
  */
  Transition** starter_dest = &starter_list;
  Transition** rest_dest = tlist_handle;
  Transition* cur_transition = *tlist_handle;
  while (cur_transition != NULL)
  {
    if (is_token_start(cur_transition))
    {
      *starter_dest = cur_transition;
      starter_dest = &(cur_transition->next);
    }
    else
    {
      *rest_dest = cur_transition;
      rest_dest = &(cur_transition->next);
    }
    cur_transition = cur_transition->next;
  }
  *starter_dest = NULL;
  *rest_dest = starter_list;        /* link starter_list to end of rest_list */
  return starter_list;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static State*
//        prune_parts_tokens(State** state_row,
//                                unsigned first_pos, unsigned last_part_end)
//        In a right-to-left loop, the transition pointers are set
//        correctly (replacing the length).
//        If a state is not marked 'live' (TokenPartBit), it is removed.
//        The remaining states get the parts_transition flag, unless the
//        TokenStart flag is set on the destination state (in which case they
//        get the regular (token_)trans flag). TokenStart is set on all
//        token beginnings (including the first part of a parts_token)
//        by add_transitions() which is called from lexicalize().
//TODO: can we end up with unreachable nodes after remove_ ?!?
//        
// Return value:
//        NULL if no parts tokens were left after pruning;
//        a State* otherwise.
//------------------------------------------------------------------------------
*/
/*
// The starter parts (all alive) are already at end of list,
// and should be kept in the same order (because they are already
// linked from the end of the state_row list).
// The algorithm below keeps a destination-pointer, to where the
// next live state-ptr should be put.
*/

static void
remove_nonlive_token_parts(Transition** tlist_handle)
{
  Transition* state = *tlist_handle;
  Transition** live_dest = tlist_handle;
  while (state != NULL)
  {
    /* this keeps the live_parts in same order */
    Transition* next = state->next;
    if (is_live_token_part(state))
    {
      *live_dest = state;
      live_dest = &(state->next);
    }
    else
    {
      /* if all transitions in all classes are removed,
      // caller (fix_parts_admin) will delete state_node as well */
      delete_transition(state);
    }
    state = next;
  }
  *live_dest = NULL;        /* end of list */
}

#ifdef SHOW_ZERO_DEST
static void
remove_erroneous_token_parts(Transition** tlist_handle,
		StateNode** state_row, StateNode** parts_row, Position at_pos)
{
    Transition* state = *tlist_handle;
    Transition** live_dest = tlist_handle;
    while (state != NULL) {
	/* this keeps the live_parts in same order */
	Transition* next = state->next;
	Position trans_pos = at_pos + get_length(state);

	if ((is_final_part(state) && !state_row[trans_pos])
	    || (is_nonfinal_part(state) && !parts_row[trans_pos])
	    || (!is_final_part(state) && !is_nonfinal_part(state))) {

	    rtsMessage("rm_err: 0x%p: '%s' term=0x%x type=0x%x\n",
			state, state->text, state->terminal, state->type);
	    rtsMessage("      pos=%d dst=%d sr[dst]=0x%p pr[dst]=0x%p\n",
		at_pos, trans_pos, state_row[trans_pos], parts_row[trans_pos]);

	    /* if all transitions in all classes are removed,
	    // caller (fix_parts_admin) will delete state_node as well */
	    delete_transition(state);
	} else {
	    *live_dest = state;
	    live_dest = &(state->next);
	}
	state = next;
    }
    *live_dest = NULL;        /* end of list */
}
#endif /* SHOW_ZERO_DEST */

static void
fix_parts_transitions(StateNode** state_row, StateNode** parts_row,
                                        Transition* state, Position at_pos)
{
  while (state != NULL)
  {
#ifdef PARTS_TRACE
        char * hyph_beg = state->type & (InfixBit|SuffixBit) ? "-" : "";
        char * hyph_end = state->type & (InfixBit|PrefixBit) ? "-" : "";
#endif
        Position trans_pos = at_pos + get_length(state);
        if (is_final_part(state))
        {
          if (is_nonfinal_part(state))
          {
            /* We seem to have both a parts_trans and a token_trans here.
            // Duplicate the state.
            */
            Transition *orig_next = state->next;
            state->next = alloc_transition();
            *(state->next) = *state;        /* copy whole struct */
            state->next->type = state->type & ~TxtFreeBit;
            state->next->next = orig_next;
#ifdef SHOW_ZERO_DEST
	    if (!parts_row[trans_pos]) {
	      fprintf(stderr, "fix_parts_transitions: add_d_tr(0x%p, 0, %d)\n",
							      state, trans_pos);
	    }
#endif
            add_parts_transition(state, parts_row[trans_pos], trans_pos);
            state = state->next;
          }
          may_skip_failures(state_row, &trans_pos);
#ifdef PARTS_TRACE
          rtsMessage("%s: pos %d state %p'%s%s%s' final=> %d:%p\n",
                "fix_parts_transitions", at_pos,
                        state, hyph_beg, state->text, hyph_end,
                                        trans_pos, state_row[trans_pos]);
#endif
#if NONT_CLASSES
#else
          assert(is_token_start(state_row[trans_pos]));
#endif
#ifdef SHOW_ZERO_DEST
	  if (!state_row[trans_pos]) {
	    fprintf(stderr, "fix_parts_transitions: add_tr(0x%p, 0, %d)\n",
							    state, trans_pos);
	  }
#endif
          add_transition(state, state_row[trans_pos], trans_pos);
        }
        else
        {
#ifdef PARTS_TRACE
          rtsMessage("%s: pos %d state %p'%s%s%s' notfinal-> %d:%p\n",
                "fix_parts_transitions", at_pos,
                        state, hyph_beg, state->text, hyph_end,
                                        trans_pos, parts_row[trans_pos]);
#endif
#ifdef SHOW_ZERO_DEST
	  if (!parts_row[trans_pos]) {
	    fprintf(stderr, "fix_parts_transitions: add_p_tr(0x%p, 0, %d)\n",
							    state, trans_pos);
	  }
#endif
          add_parts_transition(state, parts_row[trans_pos], trans_pos);
        }
        state = state->next;
  }
}

static void
fix_parts_admin(Trellis* trellis)
{
#if NONT_CLASSES
  StateNode** parts_row = trellis->pstates_row;
  StateNode** state_row = trellis->states_row;
#else
  State** parts_row = trellis->parts_row;
  State** state_row = trellis->state_row;
#endif
  int cur_pos;        /* must be signed due to >=0 test below */

  /* order does matter here; removing dead parts may change the
   * first state (pointed to from parts_row) */
  for (cur_pos = trellis->length - 1; cur_pos >= 0; cur_pos--)
  {
    if (parts_row[cur_pos] != NULL)
    {
#if NONT_CLASSES
      int n_found_classes = 0;        /* nr of classes with live parts */
      Transition** ptrans_lists = trellis->pstates_row[cur_pos]->trans_lists;
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
      {
        Transition** plist_handle = &(ptrans_lists[class]);
#else
        Transition** plist_handle = &(parts_row[cur_pos]);
#endif
        remove_nonlive_token_parts(plist_handle);
#ifdef SHOW_ZERO_DEST
TODO_make_unnecessary_the_following_KLUDGE:
	remove_erroneous_token_parts(plist_handle,
					state_row, parts_row, cur_pos);
#endif
        fix_parts_transitions(state_row, parts_row, *plist_handle, cur_pos);
#if NONT_CLASSES
        if (*plist_handle != NULL)
        {
          n_found_classes++;
        }
      } /* for class */
#  ifdef PARTS_TRACE
      rtsMessage("fix_parts_admin: pos %d n_found_classes %d\n",
                                        cur_pos, n_found_classes);
#  endif
      if (n_found_classes == 0)
      {
        /* We have removed all parts at this pos, remove the parts-StateNode
        // too, to keep the assumption intact that a StateNode is never empty
        */
        free_statenode(parts_row[cur_pos]);
        parts_row[cur_pos] = NULL;
      }
#endif
    } /* if != NULL */
  } /* for cur_pos */
}

/*
//
// The parts that can be start of token are moved to the end of
// their parts_transition_lists. The tails of the lists
// (containing these parts) are appended (not copied)
// to the corresponding tail of the token_transition_lists.
// The effect is that a starter_part appears
// both in the token_transition_list (in case a transition path
// leads to a regular token transition at this pos)
// and in the parts_transition_list (in case another transition path
// leads to a parts transition at this pos).
//
// Caller assures that pstates_row[pos] != NULL
*/

static StateNode*
share_starter_parts_transitions(Trellis *trellis, Position pos)
{
#if NONT_CLASSES
  StateNode** state_handle = &(trellis->states_row[pos]);
  Transition** ptrans_lists = trellis->pstates_row[pos]->trans_lists;
  int class;

  for (class = NR_classes - 1; class >= 0; class--)
  {
    Transition** plist_handle = &(ptrans_lists[class]);
    Transition* tlist_null = NULL;
    Transition** tlist_handle = &tlist_null;        /* so PARTS_TRACE prints 0 */

    if (*plist_handle != NULL)
    {
#else
      Transition** plist_handle = &(trellis->parts_row[pos]);
      Transition** tlist_handle = &(trellis->state_row[pos]);
      StateNode** state_handle = tlist_handle;
#endif
      /*
      // the tail of the list, containing the starter_parts, is
      // shared among the parts_ list and the state_ list
      */
      Transition* starters = reorder_parts_list_return_starters(plist_handle);
      if (starters != NULL)
      {
#if NONT_CLASSES
        if (*state_handle == NULL)
        {
          /* no regular transitions have been found at this pos yet,
          // so we need to create a transition vector here
          */
          *state_handle = alloc_statenode();
          init_statenode(*state_handle, pos);
        }
        tlist_handle = &((*state_handle)->trans_lists[class]);
#endif
        append_to_transition_list(tlist_handle, starters);
      }
#ifdef PARTS_TRACE
#  if NONT_CLASSES
      else if (*state_handle != NULL)
      {
        tlist_handle = &((*state_handle)->trans_lists[class]);
      }
      rtsMessage("%s: pos %d class %d tokens %p parts %p starters %p\n",
                "share_starter_", pos,
                                class, *tlist_handle, *plist_handle, starters);
#  else
      rtsMessage("%s: pos %d tokens %p parts %p starters %p\n",
                "share_starter_", pos, *tlist_handle, *plist_handle, starters);
#  endif
#endif
#if NONT_CLASSES
    } /* if plist_handle */
  } /* for class */
#endif
  return *state_handle;
}

/*
//------------------------------------------------------------------------------
// Function:
//      static int
//        match_parts_tokens(Trellis* trellis,
//                           const unsigned char* input, unsigned pos)
//
// Description:
//        Match parts-tokens at position pos in input, and insert corresponding
//        states in state_row. A parts-token has the following syntax:
//                prefix* (infix|word)+ suffix*
//        with at least one prefix, infix or suffix, and at most one word,
//        and not beginning or ending with an infix.
//
// Return value:
//        True, if at least one valid parts-token could be matched, or false else.
//
// Note:
//        Because we link all states at the same position with the next-pointer,
//        invalid part-tokens may still be possible (over-generation)!
//------------------------------------------------------------------------------
*/

static int
match_parts_tokens(Trellis* trellis, const unsigned char* input, Position pos)
{
  Position last_part_end = 0;
  PartState part_state = get_initial_part_state();
  StateNode* state = match_parts(trellis,input,pos,part_state, &last_part_end);
#ifdef PARTS_TRACE
  rtsMessage("%s: pos %d state %p\n",
                "match_parts_tokens", pos, state);
#endif
  if (state != NULL)
  {
#if (NONT_CLASSES && defined(PARTS_TRACE) && 0)
    dump_trans_lists_ptrs("match_parts_tokens", pos, state->trans_lists);
#endif
    assert(last_part_end > 0);

    /* mark-pass: mark those parts which are in a valid sequence;
    // the dead ones are removed (to speed up parser)
    // after lexing this sentence; not as of lexing this token.
    // At end of marking, lexicalize() from all possible endings of this token.
    */
    mark_partstate_sets(trellis, input, pos, last_part_end);

    /* We want the parts that can also be start of token
    // to appear both in the parts_transition_list
    // and in the token_transition_list.
    // 'state' may become NULL, if none of the parts is a starter
    // and there were no other (non-parts) tokens found before.
    */
    state = share_starter_parts_transitions(trellis, pos);
  } /* if (state != NULL) */
#ifdef COUNT_TRACE
  n_parts_tok_returns++;
#endif
  return state != NULL;
}

static int
match_list(StateNode** state_row, const unsigned char* input, unsigned pos)
{
#if USING_MATCH_LIST
  if (is_delimiter(input[pos], Opener))
  {
    TokenLimit limit;
    init_list_limit(&limit, input + pos + 1);
  }
#endif
  return 0;
}

/*
//------------------------------------------------------------------------------
// Function:
//        static int
//        match_eos(State** state_row, const unsigned char* input, unsigned pos)
//
// Description:
//        Try to match end-of-sentence at position pos in input. If success,
//        insert eos-state at position pos in state_row.
//
// Return value:
//        True, if eos could be matched, or false else.
//------------------------------------------------------------------------------
*/

static int
match_eos(StateNode** state_row, const unsigned char* input, unsigned pos)
{
  if (is_eos(input[pos]))
  {
    Transition* state = alloc_transition();
    init_eos_transition(state, pos);
    insert_transition(state_row, pos, gr_term_class, state, 0);
    return 1;
  }
  return 0;
}

/*
//------------------------------------------------------------------------------
// Function
//        static State*
//        lexicalize(Trellis* trellis, const unsigned char* input, unsigned pos)
//
// Description:
//        Perform lexical analysis of input, starting at position pos.
//        At this position, we either have failure, and skip to the next
//        position, or we have memoized a previous result, which is then
//        returned, or we have no result yet, in which case we try to
//        match the next token and the rest of the input following it.
//
//        Match all possible terminals with next token in priority ordering.
//        Only terminals of the same priority are allowed at the same
//        //F1 position. First skip any blanks. If we then have an invisible
//        position. If we have an invisible	//F1
//        character, match lexemes that may start with an invisible
//        character. If nothing matches, skip the invisible character,
//        //F1 and any blanks,
//	  and try again. If there are no more invisible
//        characters ahead, and we have not matched anything, try to
//        match all possible terminals, including end-of-sentence.
//        If we still don't have a match, skip the unknown token.
//
// Return value:
//        Pointer to first state at position of first valid token.
//
// To do:
//        Prefer longest matches for single tokens. This will give a
//        preference to abbreviations over single letters with periods.
//------------------------------------------------------------------------------
*/

static int
match_prio1_tokens(Trellis* trellis, const unsigned char* input, unsigned pos)
{
#if MATCH_TOKENS_WITH_PREFERENCE || !RE_ALSO_PART || defined(PARTS_LEX_PRE2000)
# if NONT_CLASSES
  StateNode** state_row = trellis->states_row;
# else
  State** state_row = trellis->state_row;
# endif
#endif

    /* The match_* functions called below return their number of matches,
    ** whereas the return value of the current function is booleanish:
    ** if 1 or more matches are found, any nonzero value may result.
    */

#if MATCH_TOKENS_WITH_PREFERENCE
  /* this is with preference, e.g. no parts if we found single_tokens */
  return match_multi_tokens(state_row, input, pos)
           || match_single_tokens(state_row, input, pos)
           || match_regexp_matches(state_row, input, pos,
				SepRequired, 0 /* only SingleToken REs */)
           || match_parts_tokens(trellis, input, pos);
#else
  /* this is without preference, all possible token types are matched */
#  ifdef PARTS_LEX_PRE2000
    return match_multi_tokens(state_row, input, pos) |
#  else	/* MultiTokens are scanned as parts_tokens */
    return
#  endif
#  if !RE_ALSO_PART
	    match_regexp_matches(state_row, input, pos,
				SepRequired, 0 /* only SingleToken REs */) |
#  endif
	    match_parts_tokens(trellis, input, pos);
#endif
}

static Position
lexicalize(Trellis* trellis, const unsigned char* input, unsigned pos)
{
  int match = 0;
#if NONT_CLASSES
  StateNode** state_row = trellis->states_row;
#else
  State** state_row = trellis->state_row;
#endif

#ifdef LEXI_TRACE
  rtsMessage("lexicalize at pos %d\n", pos);
#endif
  while (!match)
  {
    /*F1 skip_blanks_or_failures(state_row, input, &pos); */
    may_skip_failures(state_row, &pos);
    while (!match && is_invisible(input[pos]))
    {
      if (state_row[pos] != NULL)
      {
        return pos;
      }
      match = match_prio1_tokens(trellis, input, pos);
      if (!match)
      {
        do_skip_invisible_char(state_row, input, &pos);
        /*F1 skip_blanks_or_failures(state_row, input, &pos); */
        may_skip_failures(state_row, &pos);
      }
    }
    if (!match)
    {
      if (state_row[pos] != NULL)
      {
        return pos;
      }
      match = match_list(state_row, input, pos)
           || match_prio1_tokens(trellis, input, pos)
           || match_regexp_skips(state_row, input, pos,
				SepRequired, 0 /* only SingleToken REs */)
           || match_eos(state_row, input, pos);
    }
    if (!match)
      skip_unknown_token(state_row, input, &pos);
  }
  add_transitions(trellis, input, pos);
  assert(state_row[pos] != NULL);
  assert(!has_failure(state_row[pos]));
  return pos;
}

/*
//------------------------------------------------------------------------------
// Function
//        static void
//        remove_failures(Trellis* trellis)
//
// Description:
//        Replace failure positions in state-row with empty state lists.
//------------------------------------------------------------------------------
*/

static void
remove_failures(Trellis* trellis)
{
#if NONT_CLASSES
  StateNode** state_row = trellis->states_row;
#else
  State** state_row = trellis->state_row;
#endif
  unsigned len = trellis->length;
  unsigned i;
  for (i = 0; i < len; i++)
  {
    if (has_failure(state_row[i]))
      state_row[i] = NULL;
  }
}

static void
build_trellis(Trellis* trellis, const unsigned char* input)
{
  Position first_pos = lexicalize(trellis, input, 0);
  SET_FIRST_POS(trellis,first_pos);
  fix_parts_admin(trellis);        /* relies on failures still present */
  remove_failures(trellis);
}

void
init_trellis(Trellis* trellis, unsigned len)
{
#if NONT_CLASSES
  StateNode** state_row = trellis->states_row;
  StateNode** parts_row = trellis->pstates_row;
#else
  State** state_row = trellis->state_row;
  State** parts_row = trellis->parts_row;
#endif
  unsigned *last_end = trellis->last_part_end_from;
  unsigned i;
  for (i = 0; i < len; i++)
  {
    *state_row++ = NULL;
    *parts_row++ = NULL;
    *last_end++ = 0;
  }
  trellis->length = len;
}

static void
open_neg_memos_for_transition(Transition* state, NegMemo* neg_memos)
{
#ifndef STANDALONE_LEXER
    unsigned long* neg_memo_directors;
    unsigned long word, bit;
    unsigned i, neg_memo_size;

        /* determine memo directors bit-vector for this state */
    if (is_lexicon_transition(state)) {
        neg_memo_directors = lex_memo_dir[DECODE_NONT_NUMBER(state->terminal)];
    } else if (is_terminal_transition(state)) {
        neg_memo_directors = term_memo_dir[DECODE_TERM_NUMBER(state->terminal)];
    } else if (is_match_regexp_transition(state)) {
        neg_memo_directors = match_memo_dir[DECODE_REGEXP_NUMBER(state->terminal)];
    } else if (is_skip_regexp_transition(state)) {
        neg_memo_directors = skip_memo_dir[DECODE_REGEXP_NUMBER(state->terminal)];
    } else if (is_eos_transition(state)) {
        neg_memo_directors = term_memo_dir[DECODE_TERM_NUMBER(state->terminal)];
    } else {
        assert(0);
    }

        /* open neg_memos in bit-vector */
    neg_memo_size = get_nr_neg_memos();
    word = *neg_memo_directors++;
    bit = 0;
    for (i = 0; i < neg_memo_size; i++) {
        if (bit == BITS_PER_WORD) {
            word = *neg_memo_directors++;
            bit = 0;
        }

        if (word & (0x01 << bit++)) {
            neg_memos[i] = MEMO_UNKNOWN;
        }
    }
#endif /* STANDALONE_LEXER */
}

#if NONT_CLASSES
static void
add_state_neg_memos(StateNode* state, NegMemo* neg_memos)
{
  if (state != NULL)
  {
    if (directors_option)
    {
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
      {
        Transition* transition = state->trans_lists[class];
        while (transition != NULL)
        {
          open_neg_memos_for_transition(transition, neg_memos);
          transition = transition->next;
        }
      } /* for class */
    }
    state->neg_memos = neg_memos;
  }
}
#else
static void
add_state_neg_memos(State* state, NegMemo* neg_memos)
{
  while (state != NULL)
  {
    if (directors_option)
    {
      open_neg_memos_for_transition(state, neg_memos);
    }
    state->neg_memos = neg_memos;
    state = state->next;
  }
}
#endif

#ifdef PMRTS
static void
add_state_pos_memos(StateNode* state, PosMemo* pos_memos)
{
  if (state != NULL)
    state->pos_memos = pos_memos;
}
#endif /* PMRTS */

static void
initialize_neg_memos(NegMemo* neg_memos)
{
  unsigned neg_memo_size = get_nr_neg_memos();
  unsigned i;
  if (directors_option)
  {
    for (i = 0; i < neg_memo_size; i++)
    {
      *neg_memos++ = MEMO_BLOCKED;
    }
  }
  else
  {
    for (i = 0; i < neg_memo_size; i++)
    {
      *neg_memos++ = MEMO_UNKNOWN;
    }
  }
}

#ifdef PMRTS
static void
initialize_pos_memos(PosMemo* memos)
{
    int i;
    for (i = 0; i < get_nr_syntax_nonterminals(); ++i) {
        posmemo_init_table_entry(&(memos[i]));
    }
}
#endif /* PMRTS */

/*
// We must take into account below, that some states (the first parts of
// a parts_token) occur both in the parts_row and the state_row.
// For memos, no harm is done by writing the same pointer twice.
*/

void
add_trellis_neg_memos(Trellis* trellis)
{
  if (neg_memo_option || directors_option)
  {
#if NONT_CLASSES
    StateNode** state_row = trellis->states_row;
    StateNode** parts_row = trellis->pstates_row;
#else
    State** state_row = trellis->state_row;
    State** parts_row = trellis->parts_row;
#endif
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    {
      StateNode* state = *state_row++;
      StateNode* pstate = *parts_row++;
      if ((state != NULL) || (pstate != NULL))
      {
        NegMemo* neg_memos = alloc_neg_memos();        /* shared by all states at this pos */
        initialize_neg_memos(neg_memos);
        add_state_neg_memos(state, neg_memos);
        add_state_neg_memos(pstate, neg_memos);
      }
    }
  }
}

#ifdef PMRTS
void
add_trellis_pos_memos(Trellis* trellis)
{
  if (pos_memo_option)
  {
    StateNode** state_row = trellis->states_row;
    StateNode** parts_row = trellis->pstates_row;
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    {
      StateNode* state = *state_row++;
      StateNode* pstate = *parts_row++;
      if ((state != NULL) || (pstate != NULL))
      {
        PosMemo* pos_memos = alloc_pos_memos();        /* shared by all states at this pos */
        add_state_pos_memos(state, pos_memos);
        add_state_pos_memos(pstate, pos_memos);
        initialize_pos_memos(pos_memos);
      }
    }
  }
}
#endif /* PMRTS */

#ifdef COUNTERS
static const NegMemo*
get_neg_memo_table(const Trellis* trellis, unsigned pos)
{
  StateNode** state_row = GET_TRELLIS_STATE_ROW(trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW(trellis);
  StateNode* state = (state_row[pos] != 0) ? state_row[pos] : parts_row[pos];
  return (state == NULL) ? NULL : state->neg_memos;
}

#ifdef PMRTS
static const PosMemo*
get_pos_memo_table(const Trellis* trellis, unsigned pos)
{
  StateNode** state_row = GET_TRELLIS_STATE_ROW(trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW(trellis);
  StateNode* state = (state_row[pos] != 0) ? state_row[pos] : parts_row[pos];
  return (state == NULL) ? NULL : state->pos_memos;
}
#endif /* PMRTS */

void show_neg_memo_blocks(const Trellis* trellis)
{
  unsigned neg_memo_size = get_nr_neg_memos();
  if (neg_memo_size > 0)
  {
    unsigned nr_neg_memos = 0;
    unsigned nr_blocked = 0;
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    {
      const NegMemo* neg_memos = get_neg_memo_table(trellis, i);
      if (neg_memos != 0)
      {
        unsigned j;
        for (j = 0; j < neg_memo_size; j++)
        {
          if (neg_memos[j] == MEMO_BLOCKED)
            nr_blocked++;
        }
        nr_neg_memos += neg_memo_size;
      }
    }
    lex_print_formatted("Blocked %d out of %d neg_memos (%.0f%%)\n\n",
                     nr_blocked, nr_neg_memos, nr_blocked * 100.0 /
                     nr_neg_memos);
  }
}

#ifdef PMRTS
void show_pos_memo_blocks(const Trellis* trellis)
{
  unsigned pos_memo_size = get_nr_pos_memos();
  if (pos_memo_size > 0)
  {
    unsigned nr_pos_memos = 0;
    unsigned nr_blocked = 0;
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    {
      const PosMemo* pos_memos = get_pos_memo_table(trellis, i);
      if (pos_memos != 0)
      {
        unsigned j;
        for (j = 0; j < pos_memo_size; j++)
        {
          if (posmemo_is_blocked(&pos_memos[j]))
            nr_blocked++;
        }
        nr_pos_memos += pos_memo_size;
      }
    }
    lex_print_formatted("Blocked %d out of %d pos_memos (%.0f%%)\n\n",
                      nr_blocked, nr_pos_memos, nr_blocked * 100.0 /
                      nr_pos_memos);
  }
}
#endif /* PMRTS */
#endif

/*
//------------------------------------------------------------------------------
// Destruction of trellis
//------------------------------------------------------------------------------
*/

void
delete_transition(Transition* transition)
{
  if (transition->type & (TxtFreeBit))
  {
    FreeMem((char*)transition->text, "delete_transition");
  }
  if (transition->params != NULL)
  {
    FreeMem((PARAM*)transition->params, "delete_transition");
  }
  free_transition(transition);
}

/*
// In delete_state_list, we use the knowledge that the shared part
// of the state_list is the tail (and consists fully of token-parts).
*/

static void
delete_transition_list(Transition* transition, int always)
{
  while ((transition != NULL) && (always || !is_live_token_part(transition)))
  {
    Transition* next = transition->next;
    delete_transition(transition);
    transition = next;
  }
}

#if NONT_CLASSES
static void
delete_state(StateNode* state, int always)
{
  if (state != NULL)
  {
    Transition** trans_lists = state->trans_lists;
    int class;
    for (class = NR_classes - 1; class >= 0; class--)
    {
      if (*trans_lists != NULL)
      {
        delete_transition_list(*trans_lists, always);
      }
      trans_lists++;
    }
    free_statenode(state);
  }
}
#else
#  define delete_state        delete_transition_list
#endif

static void
maybe_free_neg_memos(NegMemo* memos)
{
  if (memos != NULL)
    free_neg_memos(memos);
}

#ifdef PMRTS
static void
maybe_free_pos_memos(PosMemo* memos)
{
  if (memos != NULL)
    free_pos_memos(memos);
}
#endif

void
delete_trellis(Trellis* trellis)
{
  StateNode** state_row = GET_TRELLIS_STATE_ROW(trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW(trellis);
  unsigned len = trellis->length;
  unsigned i;
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs("start of delete_trellis");
#endif
  for (i = 0; i < len; i++)
  {
    StateNode* state = *state_row++;
    StateNode* parts = *parts_row++;
    if ((state != NULL) || (parts != NULL))
    {
      /* if state != NULL,
      //   parts->neg_memos == state->neg_memos (or parts == NULL);
      // if state == NULL, parts != NULL, so we can safely dereference it
      */
      if (state != NULL)
      {
        maybe_free_neg_memos(state->neg_memos);
#ifdef PMRTS
        maybe_free_pos_memos(state->pos_memos);
#endif /* PMRTS */
      }
      else
      {
        maybe_free_neg_memos(parts->neg_memos);
#ifdef PMRTS
        maybe_free_pos_memos(parts->pos_memos);
#endif /* PMRTS */
      }
      delete_state(state, 0);
      delete_state(parts, 1);
    }
  }
  free_trellis(trellis);
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs("end of delete_trellis");
#endif
}

/*
//------------------------------------------------------------------------------
// Printing of trellis
//------------------------------------------------------------------------------
*/

static void
print_terminal_text(const Transition* state)
{
  char *outxt;

  if (dupstr_escaped(state->text, &outxt)) {
	fprintf(stderr, "AGFL runtime: rtslex out of memory\n");
	exit(2); /* out of mem */
  }

  if (is_eos_transition(state))
  {
    lex_print_formatted("%s", outxt);
  }
  else
  {
    int hyph_beg = state->type & (InfixBit | SuffixBit);
    int hyph_end = state->type & (InfixBit | PrefixBit);
    lex_print_formatted(hyph_beg ? "\"-" : "\"");
    lex_print_formatted("%s", outxt);
    lex_print_formatted(hyph_end ? "-\"" : "\"");
  }
  free(outxt);
}

/* We should have access to a list of (grammar & lexicon) nonterminal names.
// For now, rtslex gets the name from the lexicon through rtslint.
*/
static void
current_parse_add_nonterminal(LEXICON* lex, long nontnr)
{
    lex_print_formatted(lexicon_get_nont_name(lex, nontnr));
}

#ifdef STANDALONE_LEXER
static void
current_parse_add_match_regexp(long termnr)
{
  lex_print_formatted("<standalone_noname>" /* match_regexp_names[termnr] */);
}

static void
current_parse_add_skip_regexp(long termnr)
{
  lex_print_formatted("<standalone_noname>" /* skip_regexp_names[termnr] */);
}

static char
param_type_to_char(int par_kind)
{
  switch (GET_KIND_TYPE(par_kind))
  {
    case SetKind: return 'S';
    case IntKind: return 'I';
    case TextKind: return 'T';
    default:        return 'X';
  }
}

static void
print_param(PARAM cur_par, long nontnr)
{
  lex_print_formatted("%ld %c", nontnr, param_type_to_char(cur_par.kind));
  switch (GET_KIND_TYPE(cur_par.kind))
    {
      case SetKind:
        lex_print_formatted(" x%08lx", cur_par.value.set_par);
        break;
      case IntKind:
//        if (cur_par.value.int_par == ANY_INT)
//          {
            lex_print_formatted("A:INT");
//          }
//        else
//          {
//            lex_print_formatted(" %d", cur_par.value.int_par);
//          }
        break;
      case TextKind:
//        if (cur_par.value.text_par == ANY_TEXT)
//          {
            lex_print_formatted("A:TEXT");
//          }
//        else
//          {
//            lex_print_formatted("\"%s\"", cur_par.value.text_par);
//          }
        break;
      default:
        lex_print_formatted("--> Unknown param <--");
    } /* switch (GET_KIND_TYPE(cur_par.kind)) */
}
#endif /* STANDALONE_LEXER */

#if 0
/* These functions probably needed for some debugging */
static void
lex_print_set_affix_helper(LEXICON* lex, unsigned lhs_nr, SET set_val)
{
    unsigned i, nr_aff = lexicon_get_nr_setaffixes(lex);
    for (i = 0; i < nr_aff; ++i) {
        if (lexicon_affix_belongs_to_lhs(lex, i, lhs_nr)) {
            SET bitset = lexicon_get_setaffix_bitset(lex, i);
            if (bitset == set_val) {
                lex_print_formatted("%s", lexicon_get_setaffix_name(lex, i));
                return;
            } else if (bitset & set_val) {
                lex_print_set_affix_helper(lex, i, set_val);
                return;
            }
        }
    }
}

static void
lex_print_set_affix(LEXICON* lex, long nontnr, int parno, unsigned long set_val)
{
    /* OK, this is quite expensive, but we don't care for speed while
     * printing the lexical graph. */
    unsigned lhs_nr = lexicon_get_nont_param_nr(lex, nontnr, parno);
    SET bitset = lexicon_get_setaffix_bitset(lex, lhs_nr);

#ifdef DEBUG
    fprintf(stderr, "lex_print_set_affix(%p, %ld, %d, %lx) lhs_nr == %u\n",
            lex, nontnr, parno, set_val, lhs_nr);
#endif /* DEBUG */

    if (bitset == set_val) {
        lex_print_formatted("%s", lexicon_get_setaffix_name(lex, lhs_nr));
    } else {
        lex_print_set_affix_helper(lex, lhs_nr, set_val);
    }
}
#endif

static void
print_affix(VALUE value, long domain)
{
    switch (domain) {
        case TEXT_TYPE:
            print_text_affix(value.text_par, FALSE);
            break;

        case INT_TYPE:
            print_integer_affix(value.int_par, FALSE);
            break;

        default: /* Set type */
            print_set_affix(value.set_par, domain, FALSE);
            break;
    }
}

static void
print_transition(const Transition* state, LEXICON* lex)
{
    const StateIndicator trans_dest = get_transition_dest(state);
    int parno;
#ifdef DEBUG_NONTNR
    int stype = DECODE_TERM_TYPE(state->terminal);
#endif

    print_terminal_text(state);
    if (state->penalty) {
	lex_print_formatted(" [%ld]", state->penalty);
    }
    if (is_lexicon_transition(state)) {
        int arity = DECODE_NONT_ARITY(state->terminal);
        long nontnr = DECODE_NONT_NUMBER(state->terminal);
#ifndef STANDALONE_LEXER
        long *pdomain = nont_domains[nontnr];
#endif /* STANDALONE_LEXER */
        DB(fprintf(stderr, "print_transition of nontnr %ld with arity %d\n",
                   nontnr, arity));
#ifdef DEBUG_NONTNR
        lex_print_formatted("[%d] %x#%ld:", state->penalty, stype, nontnr);
#else
        lex_print_formatted(" ");
#endif
        current_parse_add_nonterminal(lex, nontnr);

        for (parno = 0; parno < arity; parno++) {
            if (parno == 0) {
                lex_print_formatted("(");
            } else {
                lex_print_formatted(", ");
            }

#ifdef STANDALONE_LEXER
            print_param(state->params[parno], nontnr);
#else
            print_affix(state->params[parno].value, *pdomain++);
#endif /* STANDALONE_LEXER */
        }

        if (arity > 0) {
            lex_print_formatted(")");
        }
    } else {
#ifdef DEBUG_NONTNR
        lex_print_formatted("[%d] %x#%ld:", state->penalty, stype,
                            DECODE_TERM_NUMBER(state->terminal));
#endif

        if (is_skip_regexp_transition(state)) {
            lex_print_formatted(" $SKIP(\"");
            current_parse_add_skip_regexp(DECODE_REGEXP_NUMBER(state->terminal));
            lex_print_formatted("\")");
        } else if (is_match_regexp_transition(state)) {
            lex_print_formatted(" $MATCH(\"");
            current_parse_add_match_regexp(DECODE_REGEXP_NUMBER(state->terminal));
            lex_print_formatted("\")");
        } else if (is_terminal_transition(state)) {
            /* we don't want the same text again
            // (unfortunately grammar nonterminal is not available)
            // current_parse_add_terminal(DECODE_TERM_NUMBER(state->terminal));
            */
        }
        /* $end of sentence$ not shown */
    }

    if (has_transition(state)) {
#ifdef SHOW_ZERO_DEST
      if (!trans_dest) {
	lex_print_formatted(" %s ***\n",
			(has_token_transition(state) ? "=>" : "->"));
	lex_print_formatted("*** ERROR: dest=0x%p in trans 0x%p \"%s\" ***\n",
						trans_dest, state, state->text);
      } else {
#endif
        if (has_token_transition(state)) {
            lex_print_formatted(" => %d", STATE_POS(trans_dest) + 1);
        } else {
            lex_print_formatted(" -> %d", STATE_POS(trans_dest) + 1);
        }
#ifdef SHOW_ZERO_DEST
      } /* if (!trans_dest) */
#endif

#if defined(PRINT_STATE_PTRS) && !TRANS_BY_POS
        lex_print_formatted(" (%p)", trans_dest);
#endif
    }
}

static void
print_transition_list(Transition* state,
                      LEXICON* lex,
                      int* have_printed,
                      TransTest* do_pr)
{
    while (state != NULL) {
        Transition* next = state->next;

        if (do_pr(state)) {
#if 0
            if (state->pos != pos1) {
                /* FN: catch possible bug, just in case */
                pos1 = state->pos;
                lex_print_formatted("\n!!%4d ", pos1 + 1);
            }
#endif
            if (*have_printed) {
                lex_print_formatted(", ");
            } else {
                *have_printed = 1;
            }

            print_transition(state, lex);
        } /* if do_pr */

        state = next;
    } /* while */
}

static void
print_state_transitions(StateNode* state,
                        LEXICON* lex,
                        int *have_printed,
                        TransTest* do_pr)
{
    /* state != NULL, caller has checked */
#if NONT_CLASSES
    Transition** trans_lists = state->trans_lists;
    int class;
    for (class = 0; class < NR_classes; class++) {
        if (*trans_lists != NULL) {
            print_transition_list(*trans_lists, lex, have_printed, do_pr);
        }
        trans_lists++;
    }
#else
    print_transition_list(state, lex, have_printed, do_pr);
#endif
}

/*
// Top level routine for printing the lexical graph.
// At each input position, there are two lists of transitions
// (each of which may be empty):
// A. the regular list, which will be used when a token transition
//                leads to this position; it contains the symbols
//                that may appear at the start of a token.
//                This list is only built if lexicalize() is called
//                for this position, i.e. if complete tokens can
//                start here.
//                These transitions have the token_start flag.
// B. the parts list, which will be used when a parts transition
//                leads to this position; it contains the symbols
//                that may appear in a parts_token.
//                This list is built both through add_parts_transitions()
//                (scanning for parts in the middle of a word)
//                and through lexicalize().
//                These transitions have the (live_)token_part flag.
// Some parts are in both lists:
//        those that can be the first part of a token
//        i.e. single tokens, prefix parts, and infix parts
//        (provided this pos is a place where words can start).
//        These parts are moved to the end of the parts list,
//        and the pointer to the first of them is copied to
//        the end of the regular list, thus resulting in a shared tail.
//        These transitions have both the token_start and 
//        the (live_)token_part flag.
// The transitions are printed in the following order:
//        1. those that are only in the regular list
//        2. those that are only in the parts list
//        3. those that are in both lists
// If PRINT_PARTS_SEMICOLON is #defined (in DEBUG mode),
//        a ';' is printed between 1. and 2. and between 2. and 3.,
//        in order to show the data structure in greater detail.
// 
*/
void
print_trellis(const Trellis* trellis)
{
    LEXICON* lex = trellis->lexicon;
#if NONT_CLASSES
    StateNode* const* state_row = trellis->states_row;
    StateNode* const* parts_row = trellis->pstates_row;
#else
    State* const* state_row = trellis->state_row;
    State* const* parts_row = trellis->parts_row;
#endif
    unsigned len = trellis->length;
    unsigned pos;
    char *out_line_sep; /* gets printf'd, so watch out for % signs */
#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("start of print_trellis");
#endif
    if (lex_out_format == G_OUT_FORMAT_TRELLIS_INPUT) {
	out_line_sep = "\t";
    } else {
	out_line_sep = "\n";
    }
    for (pos = 0; pos < len; pos++) {
        StateNode* state = *state_row++;
        StateNode* pstate = *parts_row++;
        int did_print = 0;

        if (state != NULL) {
            lex_print_formatted("%4d ", pos + 1);
            /* print nonshared (first) part of state_row lists: */
#ifdef PRINT_STATE_PTRS
            lex_print_formatted("(%p) ", state);
#endif
            print_state_transitions(state, lex, &did_print,
                                    &not_live_token_part);
            if (pstate != NULL) {
#ifdef PRINT_PARTS_SEMICOLON
                lex_print_formatted(";");
#endif
#ifdef PRINT_STATE_PTRS
                lex_print_formatted("(%p) ", pstate);
#endif
                /* print nonshared (first) part of parts_row list: */
                print_state_transitions(pstate, lex, &did_print,
                                        &not_token_start);
#ifdef PRINT_PARTS_SEMICOLON
                lex_print_formatted(";");
#endif
                /* print shared (last) part of both lists: */
                print_state_transitions(state, lex, &did_print,
                                        &is_live_token_part);
            }

            lex_print_formatted(out_line_sep);
        } else if (pstate != NULL) {
#ifdef PRINT_PARTS_SEMICOLON
            lex_print_formatted("%4d;  ", pos + 1);
#else
            lex_print_formatted("%4d ", pos + 1);
#endif
#ifdef PRINT_STATE_PTRS
            lex_print_formatted("(%p) ", pstate);
#endif
            /* print whole parts_row list (there is no state_row list) */
            print_state_transitions(pstate, lex, &did_print, &trans_true);
            lex_print_formatted(out_line_sep);
        }
    }
    if (lex_out_format == G_OUT_FORMAT_TRELLIS_INPUT) {
	lex_print_formatted("\n");
    }

    fflush(stdout);
#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("end of print_trellis");
#endif
}

static void
may_get_shorter_trans_from_state(StateNode const * istate,
				 Position * least_next_pos_p,
				 StateNode * * shortest_p);

static void
may_get_shorter_trans_from_list(Transition const * trans_list,
	      Position * least_next_pos_p, StateNode * * shortest_p)
/* may change *shortest_p into a StateNode at a position before least_next_pos
** (and change *least_next_pos_p accordingly)
*/
{
#if TRANS_BY_POS
#error may_get_shorter_trans_from_list() not implemented for TRANS_BY_POS case
#else /* not TRANS_BY_POS */
    for (; trans_list; trans_list = trans_list->next) {
	Transition const * cur_trans = trans_list;
	StateNode * cur_dest = cur_trans->TDEST_FIELD;
	if (STATE_POS(cur_dest) < *least_next_pos_p) {
	    if (!has_token_transition(cur_trans)) {
		/* This is a transition to a following part.
		** We now look for the shortest sequence of parts
		** (of which only the last one has_token_transition).
		*/
		may_get_shorter_trans_from_state(cur_dest, least_next_pos_p,
								shortest_p);
	    } else {
		*shortest_p = cur_dest;
		*least_next_pos_p = STATE_POS(cur_dest);
	    }
	}
    } /* for */
#endif /* not TRANS_BY_POS */
} /* may_get_shorter_trans_from_list */

static void
may_get_shorter_trans_from_state(StateNode const * istate,
				 Position * least_next_pos_p,
				 StateNode * * shortest_p)
{
#if NONT_CLASSES
    int class;

    assert(istate != NULL);
    /* not using lex_info->nr_nont_classes, as it isn't yet filled correctly */
    for (class = 0; class < NR_classes; class++)
    {
	if ((class != re_match_class) && (class != re_skip_class)) {
	    may_get_shorter_trans_from_list(istate->trans_lists[class],
						least_next_pos_p, shortest_p);
	}
    } /* for class */
    if (!*shortest_p) {
	may_get_shorter_trans_from_list(istate->trans_lists[re_match_class],
						least_next_pos_p, shortest_p);
	may_get_shorter_trans_from_list(istate->trans_lists[re_skip_class],
						least_next_pos_p, shortest_p);
    }
#else /* no NONT_CLASSES */
#error may_get_shorter_trans_from_state() only implemented for NONT_CLASSES case
#endif /* NONT_CLASSES */
} /* may_get_shorter_trans_from_state */

StateNode*
get_shortest_transition(Trellis const * trellis, StateNode const * istate)
/* Find the nearest StateNode (i.e. the one with the lowest pos) that can
** be reached from here after a full transition (e.g. a single token or a
** sequence of parts). Regexp ($MATCH or $SKIP) transitions are only
** considered if there are no other transitions.
**
** If the state has an EOS transition, we'll take that one.
**
** Return value: NULL if no transition, pointer to new StateNode otherwise.
*/
{
    StateNode *shortest = NULL;
    Position least_next_pos;

    assert(trellis);
    assert(istate);
    if (state_has_eos_transition(istate)) {
        return NULL;
    }
    
    least_next_pos = trellis->length + 1; /* > any nextpos */
    may_get_shorter_trans_from_state(istate, &least_next_pos, &shortest);
    return shortest;
} /* get_shortest_transition */

/* TODO: initialize neg_memos using director-sets*/

Trellis*
make_trellis_by_word_lexing(const char* input, LEXICON* the_lex)
{
    unsigned len = strlen(input) + 1;
    Trellis* trellis = alloc_trellis(len);
#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("start of make_trellis_by_word_lexing");
#endif

    trellis->lexicon = the_lex;
    init_trellis(trellis, len);
#ifdef COUNT_TRACE
    n_trel_builds++;
#endif
    build_trellis(trellis, (unsigned char*)input);

    /*
    // Extra pass state-types -> state-terminal bits; expand lexicon states
    */
#if NONT_CLASSES
#else
    add_trellis_entries(trellis, the_lex);
#endif

    /*
    // Add neg_memo vector to all states at the same position.
    // If directors_option, block neg_memos that are not possible according
    // to the directors of the neg_memos.
    // Note that open_neg_memos_for_state() expects the lexicon states to
    // be expanded.
    */
    add_trellis_neg_memos(trellis);
#ifdef PMRTS
    add_trellis_pos_memos(trellis);
#endif /* PMRTS */

#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("end of make_trellis_by_word_lexing");
#endif
#if 0
TODO_dont_print_shortest:
    {
	StateNode *test=get_shortest_transition(trellis, trellis->first_state);
	rtsMessage("shortest %p\n", test);
	if (test) {
	    /* in output representation, positions start at 1 */
	    rtsMessage(".. pos %d\n", STATE_POS(test) + 1);
	}
    }
#endif /* print_shortest */
    return trellis;
}

int
state_has_eos_transition(StateNode const * the_state)
{
    Transition* gr_trans_list = the_state->trans_lists[gr_term_class];
    return (gr_trans_list != NULL) && is_eos_transition(gr_trans_list);
}

int
is_empty_trellis(const Trellis* trellis)
{
    return state_has_eos_transition(GET_FIRST_STATE_PTR(trellis));
}
