/*
   File: lexicon_search.c
   Defines the routines to search in the lexicon

   Copyright 2005 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: lexicon_search.c,v 1.28 2007/10/17 11:52:25 marcs Exp $
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

/* 
 * USE_OLD_SEARCH_CODE
 * When enabled, this flag (re)activates the old search code (pre 5/4/2005)
 *
 * When disabled, it activates the new search code (changes: searches in
 * the new code uses an explicit DFA which was embedded in the old code)
 */
#define USE_OLD_SEARCH_CODE 1

enum { MaxLexLen = 1024 };  /* max number of TrieFrame's on the TrieData stack */

/* standard includes */
#include <assert.h>

/* includes from the abase lib */
#include <abase_memalloc.h>
#include <abase_error.h>

/* local includes */
#include "lexicon.h"
#include "lxcn_input.h"
#include "lexicon_search.h" 

/*------------------------------------------------------------------------------
//
// Trie structure:
// The type of a trie is defined as `void *' on some occasions.
// For this file, a trie is defined as `unsigned char *'.
//
// It is not possible to state the trie structure with valid C-code so we
// use broken C-code to explain the idea.
//
// typedef struct {
//   // a header of two bytes.
//   unsigned char trie_path_sz;          size of a shared prefix
//   unsigned char trie_size_sz;          size (or number of characters) in the trie
//   
//   // the content for this node.
//   unsigned char path[trie_path_sz];    text of the shared prefix
//   unsigned char line[trie_size_sz];    for each choice, one character
//					  in sorted order, no `NULL' choices
//
//   // and a collection of offsets to child nodes.
//   unsigned char alignment[0-7] = '\0'; alignment bytes needed to get offsets[] at word boundary
//   unsigned offset[trie_size_sz];       for each choice, the absolute offset to the next trie node
// } trie_node;
// trie_node *trie;
//
//----------------------------------------------------------------------------*/

enum { WORD_SZ = sizeof(void *) };
enum { TRIE_PATH_IDX = 0, TRIE_SIZE_IDX = 1, TRIE_HEADER_SZ = 2 };

static unsigned char* align (unsigned char* p)
{ size_t mod = (size_t)(((unsigned long) p) % WORD_SZ);
  return ((mod)?(p + WORD_SZ - mod):p);
}

/*------------------------------------------------------------------------------
// Function:
//        int lxcn_bin_search(unsigned char c, unsigned char* p, unsigned size)
//
// Description:
//        Binary search for c in array p with size > 0.
//
// Return value:
//        Index of c in array, or -1 else.
//
// We include the binary search routing here: it is used nowhere else
//----------------------------------------------------------------------------*/
static int lxcn_bin_search (unsigned char c, unsigned char* p, unsigned size)
{ unsigned low = 0;
  unsigned high = size;
  do
    { unsigned mid = (low + high) / 2;
      unsigned char d = p[mid];
      if (c < d)
        high = mid;
      else if (c > d)
        low = mid + 1;
      else
        return (mid);
    }
  while (low < high);
  return (-1);
}

/*------------------------------------------------------------------------------
// Type:
//        TrieFrame, TrieData
//
// Description:
//        A TrieData structure contains the parameters of the trie search
//	  process, and a stack with TrieFrame stack frames representing states
//        of the search process. Initially, one new frame is pushed onto
//        the stack, and each time the search process forks, another frame
//        is pushed. If a search branch fails, or is exhausted, its frame
//        is popped from the stack. If searching is successful, the state
//        of the searching process is stored in the frame, and the results
//        are stored in lxcn_TrieData.
//
//	The data structure lxcn_TrieData is not available outside
//	this file but its name is. (See lexicon_search.h for details.)
//
//	2000-05-01 FN (adding forking if translation available)
//	Since we can no longer rely on the last char in the lexeme_buffer
//	(it may be the untranslated char), the correct char now lives
//	in the frame, and the position (last_pos, inside the lex_buf)
//	where it should go is put there too, instead of lex_end
//	(which is derived after restoring).
//----------------------------------------------------------------------------*/

typedef struct
{ unsigned		offset;		/* next trie offset */
  unsigned char*	input;		/* next input position */
  unsigned char*	lex_last_pos;	/* points to last char of lexeme */
  unsigned char		lex_last_char;	/* char to put into last_pos */
#if !USE_OLD_SEARCH_CODE 
  unsigned char*	node;		/* next trie input position */
#endif
} TrieFrame;

struct TrieData_rec
{ unsigned char		lex_mark;		/* marker indicating lexeme type */
  unsigned char*	lex_end;		/* pointer beyond last lexeme */
  unsigned		info;			/* info of last lexeme */
  TrieFrame*		sp;			/* pointer to next free frame; */
  TrieFrame		stack[MaxLexLen];	/* frame stack */
  unsigned char		lex_buf[1 + MaxLexLen];	/* holds text of matched lexeme */
  unsigned char	*	lex_begin;		/* points to matched lexeme */
};


int max_edit_distance = 0;

static void save_frame (TrieFrame* fp, unsigned offset, unsigned char* input, 
			unsigned char* lex_last_pos, unsigned char lex_last_char 
#if !USE_OLD_SEARCH_CODE 
  , unsigned char* node 
#endif
  )
{ fp->offset = offset;
  fp->input = input;
  fp->lex_last_pos = lex_last_pos;
  fp->lex_last_char = lex_last_char;
#if !USE_OLD_SEARCH_CODE 
  fp->node = node;
#endif
}

#if USE_OLD_SEARCH_CODE  
static void restore_frame (TrieFrame* fp, unsigned* p_offset, unsigned char** p_input, 
			   unsigned char** p_lex_last_pos, unsigned char* p_lex_last_char)
{ *p_offset = fp->offset;
  *p_input = fp->input;
  *p_lex_last_pos = fp->lex_last_pos;
  *p_lex_last_char = fp->lex_last_char;
}

#else /* !USE_OLD_SEARCH_CODE */

static void restore_frame (TrieFrame* fm, TrieFrame* to)
{ to->offset = fm->offset;
  to->input = fm->input;
  to->lex_last_pos = fm->lex_last_pos;
  to->lex_last_char = fm->lex_last_char;
  to->node = fm->node;
}

static void reconstruct_trie_position (Trie trie, unsigned offset, int *path, 
				       unsigned char** node, unsigned *size)
{ unsigned char* tnode;
  int            tpath;

  /*
  // reconstruct trie position and read header info
  */
  tnode = (unsigned char*)trie + offset;
  tpath = tnode[TRIE_PATH_IDX]; /* length of compressed path */
  *path = tpath;
  *size = tnode[TRIE_SIZE_IDX]; /* number of subtries */
  tnode += TRIE_HEADER_SZ;
  if (*node == NULL) *node = tnode;
  else
    { *path -= *node - tnode;
      if (*path < 0) *path = 0;
    };
}
#endif

static TrieFrame *get_top_frame (TrieData data)
{ return ((data -> sp == data -> stack) ? NULL : (data -> sp - 1));
}

static TrieFrame* push_frame (TrieData data)
{ assert ((data -> sp < &data -> stack[MaxLexLen]) && "push_frame: stack overflow.");
  return (data -> sp++);
}

static void pop_frame (TrieData data)
{ assert((data -> sp > data -> stack) && "pop_frame: empty stack.");
  data->sp--;
}

static void mark_prefix (TrieData data, long info, unsigned char* lex_end)
{ data -> info = info;
  data -> lex_end = lex_end;
}

/*-----------------------------------------------------------------------------
// Public parts:
//---------------------------------------------------------------------------*/

TrieData lxcn_init_approx_match_trie (unsigned char* input, LexemeType lex_type)
{ TrieData data = (TrieData) abs_malloc (sizeof (struct TrieData_rec), "lxcn_init_trie_data");

  data -> lex_begin = data -> lex_buf + 1;
#if USE_OLD_SEARCH_CODE 
  save_frame (data -> stack, 0, input, data -> lex_buf, '\0');
#else
  save_frame (data -> stack, 0, input, data -> lex_buf, '\0', NULL);
#endif
  data -> sp = data -> stack + 1;
  data -> lex_mark = lxcn_get_lex_mark (lex_type);

  return (data);
}

void lxcn_exit_approx_match_trie (TrieData trie_data)
{ abs_free (trie_data, "lxcn_exit_trie_data");
}

unsigned char * lxcn_get_lex_begin (TrieData trie_data)
{ return (trie_data -> lex_begin);
}

unsigned char *lxcn_get_lex_end (TrieData trie_data)
{ return (trie_data -> lex_end);
}

int lxcn_get_info (TrieData trie_data)
{ return ((int) (trie_data -> info));
}

/*------------------------------------------------------------------------------
// Function:
//        static unsigned char* approx_match_trie(Trie trie, TrieData data)
//
// Description:
//        Search next lexeme in trie, beginning in the state that is stored
//        on top of the search stack. If a non-empty prefix can be matched,
//        its related info is stored in data. At the first invocation, any
//        marker indicating a lexeme type in frame is matched, and set 
//        to empty. A blank in the trie matches one or more blanks in the
//        input. If a character cannot be matched literally, it is tried
//        to match its translation. If both a literal character and its
//        translation can be matched, the search process is forked by pushing
//        a new frame representing the second branch on the stack.
//        If a prefix has been matched, it is checked whether it should be
//        followed by a terminator. While matching input, the text of the
//        matched lexeme in the trie is copied into the buffer in data.
//
// Return value:
//        A pointer to the first unmatched character, if a non-empty prefix
//        could be matched, or NULL else.
//
// To do:
//        Branch and bound for minimal edit distance;
// Done: always fork search. (2000-05-01)
//	(FN: I hope "in case of translation" is meant by "always")
//----------------------------------------------------------------------------*/

#if !USE_OLD_SEARCH_CODE 

/*------------------------------------------------------------------------------
//
// matching with a FA
//
// Matching input against the lexicon depends on the state of the input
// (is there a lexicon mark or not?) and the area of the lexicon where
// to look (looking up in the prefix path or in the trie line?) This
// can be seen as two independent FA's but are combined into four states.
//
// Then there is some overhead for the matching such as what to do when
// a match is found (or the failing of a match is clear) and what to do
// when we need to go to the next node in the lexicon. These are as well
// registered as separate states (although you might argue that these
// so called `states' are not event driven but just results from the
// previous events. In this implementation actions are coupled to 
// states and not to events.)
//
// Matching is done with the help of the following states:
// Input:          (These states must be combined with the lexicon
//                 sPATH and sLINE states to form 4 different states)
//   sLEXMARK:     A lexicon marker has has been specified wich must
//                 be matched first.
//   sNO_LEXMARK:  Start eating the input stream, no special markers.
//
// Lexicon:        (see also: comments in lexicon_trie.h for clarifiaction 
//                 of the lexicon trie structure.)
//                 (The first two states can be combined with sLEXMARK and
//                 sNO_LEXMARK states to form 4 different states)
//   sPATH:        In a (shared) prefix for which no new lexicon node needs
//                 to be used. There is a choise of 1 out of 1 characters 
//                 to match.
//   sLINE:        Get a character from input to match with the line.
//   sLOOKUP:      In the lexicon trie line, there is a choice of 1 out of
//                 n (possibly 0) characters. Each choice is followed up by
//                 a new lexicon node.
//   sFETCH:       (Internal event, a match in sLINE) Look up the next 
//                 lexicon nodes.
//
// Extra states:   (All internal events)
//   sFAIL:        Input stream and Lexicon do not match up to one lexeme.
//                 Just abort the match, pop this frame and start with
//                 a new frame.
//   sPACCEPT:     Preparations for sACCEPT. Just poping the current frame.
//   sACCEPT:      Found a match, return the results.
//
// Entrance states:
// On start of the DFA it has one of the following states:
//    sPATH + sNO_LEXMARK (assumed to be identical to sPATH)
//    sPATH + sLEXMARK
//    sLINE + sNO_LEXMARK (assumed to be identical to sLINE)
//    sLINE + sLEXMARK
//
// Exit states:
// Leaving the automaton is only possible via the following accepting states:
//    sFAIL
//    sACCEPT
//----------------------------------------------------------------------------*/

typedef enum {
   /* input state */
   sNO_LEXMARK =  0, /* normal input, its value of 0 is assumed in other parts of the code. */
   sLEXMARK    =  1, /* input overruled by lex_mark */

   /* lexicon state */
   sPATH       =  2, /* in lexicon path part */
   sLINE       =  4, /* in lexicon trie line */

   /* non final states */
   sLOOKUP     =  8, /* lookup a character in the line. */
   sFETCH,           /* fetch next lexicon node */

   /* final states */
   sFAIL,            /* found no match */
   sPACCEPT,         /* found a match but pop frame first. */
   sACCEPT           /* found a match */
} state;

#define accept(f,c) *(f)->lex_last_pos++ = (c)

inline static void skip_lexmark (TrieFrame *f, unsigned char *lex_mark, TrieData data)
{
   data->lex_mark = *lex_mark = EmptyMark;
}

#define lookup_input(f) *(f)->input
#define step_input(f) (f)->input++
inline static void skip_input (TrieFrame *f)
{
   if (!lxcn_is_eos(lookup_input (f))) {
      f->input++;
      while (lxcn_is_blank(lookup_input (f))) step_input (f);
   }
}

#define lookup_path(f) *(f)->node
#define lookup_line(f) *(f)->node
inline static void
skip_path (TrieFrame *f, int *path)
{
   f->node++; 
   (*path)--;
}

inline static state
stateLexmarkPath (
   TrieFrame     *f, 
   unsigned char *lex_mark,
   int           *path,
   TrieData      data)
{
   if (lookup_path(f) != *lex_mark) return sFAIL;
   accept (f, *lex_mark);
   skip_lexmark (f, lex_mark, data);
   skip_path (f, path);
   return (*path == 0) ? sLINE : sPATH;
}

inline static state
statePath (
   TrieFrame     *f, 
   int           *path,
   unsigned char *next_char)
{
   while (*path) {
      unsigned char c;

      c = lookup_path(f);
      *next_char = lookup_input(f);
      if ((c == *next_char) || (c == lxcn_translate(*next_char))) {
         skip_path (f, path);
         skip_input (f); 
      } else return sFAIL;
      accept (f, c);
   } /* while ((*path)--) */
   return sLINE; 
}

inline static state
stateLexmarkLine (
   TrieFrame     *f, 
   TrieFrame     *fp, 
   unsigned char *next_char,
   unsigned char *lex_mark,
   unsigned char **success,
   int           *index,

   TrieData	 data,
   unsigned      size)
{
   *success = NULL;
   *next_char = *lex_mark;
   skip_lexmark (f, lex_mark, data);
   return sLOOKUP;
}

inline static state
stateLine (
   TrieFrame      *f, 
   TrieFrame      *fp, 
   unsigned char  *next_char,
   unsigned char  **success,
   int            *index,

   TrieData       data,
   unsigned       size
   )
{
   *success = NULL;
   if (lxcn_is_eos(lookup_line(f))) {
      long info = *(long*)align(f->node + size);
      mark_prefix(data, info, f->lex_last_pos);
      *success = f->input;
   }

   *next_char = lookup_input(f);
   skip_input(f);
   if (lxcn_is_eos(*next_char)) return (*success != NULL) ? sPACCEPT : sFAIL;
   /* mind you, lxcn_is_blank does not only check for ' '! */
   else if (lxcn_is_blank(*next_char)) *next_char = ' ';
   return sLOOKUP;
}

inline static state
stateLookup (
   TrieFrame     *f, 
   TrieFrame     *fp, 
   unsigned char *next_char,
   unsigned char **success,
   int           *index,

   TrieData	 data,
   unsigned      size)
{
   *index = lxcn_bin_search(*next_char, f->node, size);
   if (*next_char != lxcn_translate(*next_char)) {
      int tndex = lxcn_bin_search(lxcn_translate(*next_char), f->node, size);
      if (*index < 0 && tndex < 0) {
         /* neither path available */
         return (*success != NULL) ? sPACCEPT : sFAIL;
      } else if (*index < 0) {
         /* only translated path available */
         *next_char = lxcn_translate(*next_char);
         *index = tndex;
      } else if (tndex >= 0) {
         /* both paths available */
         unsigned tffset = *(long*)align(f->node + size + WORD_SZ * tndex);
         save_frame(fp, tffset, f->input, f->lex_last_pos, lxcn_translate(*next_char), f->node);
         fp = push_frame(data);
      }
   } else if (*index < 0) {
      return (*success != NULL) ? sPACCEPT : sFAIL;
   } 
   return sFETCH;
}

static state reconstruct_state (
   TrieFrame     *f, 
   int           path,
   unsigned char lex_mark)
{
   if (f->action_map & ACTION_MASK) return ((path > 0) ? sIDR : sIDRn);
   return ( ((path > 0) ? sPATH : sLINE) + ((lex_mark != EmptyMark) ? sLEXMARK : sNO_LEXMARK) );
}

inline static state
stateFetch (
   TrieFrame     *f, 
   TrieFrame     *fp, 
   int           *path,
   unsigned char **success,
   unsigned      *size,

   Trie 	 trie,
   unsigned char lex_mark,
   unsigned char next_char,
   int           index)
{
   /*
   // Fetch offset of next trie node. If we have found
   // an info, remember the current offset and input position,
   // and return the last prefix.
   */
   f->offset = *(long*)align(f->node + *size + WORD_SZ * index);
   f->node = NULL;
   if (*success != NULL) {
      save_frame(fp, f->offset, f->input, f->lex_last_pos, next_char, f->node);
      return sACCEPT;
   }
   accept(f, next_char);	/* Ftrans moved here */

   reconstruct_trie_position (trie, f->offset, path, &f->node, size);
   return (reconstruct_state (f, *path, lex_mark));
}

unsigned char* lxcn_approx_match_trie (Trie trie, TrieData data)
/* returns NULL if there are no (more) matches by any remaining frame */
{  TrieFrame      frame, *f = &frame;
   TrieFrame*     fp;        /* which frame from data to use? */
   state          s;         /* phase of the matching process */
   int            path;      /* size of lexicon path part */
   unsigned       size;      /* size of lexicon trie line */
   unsigned char  lex_mark;  /* working copy from data */

   /* initialised to stop nagging by the compiler. */   
   int            index = -1;       /* found a match in the trie line */
   unsigned char* success = NULL;   /* found a matching lexeme */
   unsigned char  next_char = '\0'; /* next char from input stream */

try_from_top_frame:

   fp = get_top_frame(data);
   if (fp == NULL) return NULL;

   /* frame actually contains lex_last_pos, not lex_end */
   restore_frame(fp, f);
   if (f->lex_last_char != '\0') *f->lex_last_pos++ = f->lex_last_char;
   
   lex_mark = data->lex_mark;
   reconstruct_trie_position (trie, f->offset, &path, &f->node, &size);
   s = reconstruct_state (f, path, lex_mark);

   while (1) {
      switch (s) {
      case sFAIL:
         /*
          *  accept, no match (try next frame).
          */
         pop_frame(data);
         goto try_from_top_frame;

      case sPACCEPT:
         pop_frame(data);
         /* and fall through */
      case sACCEPT:
         /*
          * accept, with match (inform caller).
          */
         return success; 

      case sLEXMARK + sLINE:
         /*
          * entry
          *
          * next_char = lex_mark (for LOOKUP state)
          * a success mark is set on a match which is used in the FETCH state
          */
         s = stateLexmarkLine (f, fp, &next_char, &lex_mark, &success, &index, data, size);
         break;

      case sLEXMARK + sPATH:
         /*
          * entry
          * lexmark == *path, |path| = 0 -> sPATH
          * lexmark == *path, |path| > 0 -> sLINE
          * lexmark != *path             -> sFAIL
          */
         s = stateLexmarkPath (f, &lex_mark, &path, data);

         if (s != sPATH) break;
         /* and fall through */
      case sPATH:
         /*
          * entry
          * *input == space, space...                  -> sPATH (implicit, reduce multipe spaces to one)
          * *input == *path, |path| > 0                -> sPATH (implicit)
          * *input == *path, |path| = 0                -> sLINE
          * *input == *path, |path| = 0, *input = '\0' -> sACCEPT
          * *input != *path                            -> sFAIL
          */
         s = statePath (f, &path, &next_char);

         if (s != sLINE) break;
         /* and fall through */
      case sLINE:
         /*
          * entry
          * next_char = *input (for LOOKUP state)
          * a success mark is set on a match which is used in the FETCH state
          */
         s = stateLine (f, fp, &next_char, &success, &index, data, size);

         if (s != sLOOKUP) break;
         /* and fall through */
      case sLOOKUP:
         /*
          * next_char != tranlate(next_char) && next_char in line && tranlate(next_char) in line
          *    -> push translated state on stack, continue with untranslated state.
          *
          * next_char in line || translated(next_char) in line          -> sFETCH
          * next_char not in line && translated (next_char) not in line -> sFAIL
          *
          */
         s = stateLookup (f, fp, &next_char, &success, &index, data, size);

         if (s != sFETCH) break;
         /* and fall through */
      case sFETCH:
         /*
          * success    -> sACCEPT
          * no success -> fetch new node
          * no success && |path| > 0 && lexmark     -> sPATH + sLEXMARK
          * no success && |path| > 0 && not lexmark -> sPATH
          * no success && |path| = 0 && lexmark     -> sLINE + sLEXMARK
          * no success && |path| = 0 && not lexmark -> sLINE
          */
         // s = stateFetch (f, fp, &path, &success, &size, trie, next_char, index);
         s = stateFetch (f, fp, &path, &success, &size, trie, lex_mark, next_char, index);
         break;

      default:
         abs_error ("Unknown state %d in approx_match_trie", s);
         break;
      }
   }
}

#else /* !USE_OLD_SEARCH_CODE */

unsigned char* lxcn_approx_match_trie (Trie trie, TrieData data)
/* returns NULL if there are no (more) matches by any remaining frame */
{ unsigned       offset;
  unsigned char* input;
  unsigned char* lex_end;
  unsigned char  lex_mark;
  TrieFrame*     fp;

try_from_top_frame:
  fp = get_top_frame(data);
  if (fp == NULL) return NULL;
   
  {
	unsigned char last_ch;

	/* frame actually contains lex_last_pos, not lex_end */
	restore_frame(fp, &offset, &input, &lex_end, &last_ch);
#if 0
	abs_message ("approx_match_trie restored frame");
	abs_message ("  lex_end - data->lex_buf = %d, last_ch=0x%02x",
			  lex_end - data->lex_buf,	last_ch		);
#endif
	*lex_end++ = last_ch;
  }
  lex_mark = data->lex_mark;

  while (1)
  {
    unsigned char* success;
    unsigned char next_char;
    int index;
    /*
    // Move to next trie node, and read header info
    */
    unsigned char* node = (unsigned char*)trie + offset;
    unsigned path = node[TRIE_PATH_IDX]; /* length of compressed path */
    unsigned size = node[TRIE_SIZE_IDX]; /* number of subtries */
    node += TRIE_HEADER_SZ;
    /*
    // Match input with characters of prefix path in trie node.
    // If lex_mark is not empty, match it first.
    */
    if (path > 0) {
      if (lex_mark != EmptyMark)
      {
        if (*node++ != lex_mark)
        {
          pop_frame(data);
          goto try_from_top_frame; /* return NULL; */
        }
	*lex_end++ = lex_mark;
        data->lex_mark = lex_mark = EmptyMark;
        path--;
      }
      while (path--) {
	unsigned char c = *node++;
	next_char = *input;
	if (c == ' ') {
	    if (!lxcn_is_blank(next_char)) {
		pop_frame(data);
		goto try_from_top_frame; /* return NULL; */
	    }
	    input++;
	    while (lxcn_is_blank(*input)) {
		input++;
	    }
	} else if (c == next_char) {
	    /* FN: in path, so there can't be a translation as well */
	    input++;
	} else if (c == lxcn_translate(next_char)) {
	    input++;
	} else {
	    pop_frame(data);
	    goto try_from_top_frame; /* return NULL; */
	}
	*lex_end++ = c;
      } /* while (path--) */
    } /* if (path > 0) */
    /*
    // If lex_mark is still not empty, match it first.
    */
    success = NULL;
    if (lex_mark != EmptyMark)
    {
      next_char = lex_mark;
      data->lex_mark = lex_mark = EmptyMark;
    }
    else
    {
      /*
      // If we have a prefix, check terminator. If success,
      // set position and save node info.
      */
      if (lxcn_is_eos(*node))
      { long info = *(long*)align(node + size);
        mark_prefix(data, info, lex_end);
        success = input;
      }
      /*
      // Get next input character. If end-of-string, return success, if any.
      // Else, if blank, skip blanks, and try to match space.
      */
      next_char = *input++;
      if (lxcn_is_eos(next_char))
      {
        pop_frame(data);
        /* return success; */
	if (success != NULL) {
	    return success;
	} else {
	    goto try_from_top_frame;
	}
      }
      else if (lxcn_is_blank(next_char))
      {
        while (lxcn_is_blank(*input))
          input++;
        next_char = ' ';
      }
    }
    /*
    // Try to match the next input character in the branches.
    */
    index = lxcn_bin_search(next_char, node, size);
    if (next_char != lxcn_translate(next_char)) {
	int tndex = lxcn_bin_search(lxcn_translate(next_char), node, size);
	if (index < 0 && tndex < 0) {
	    /* neither path available */
	    pop_frame(data);
	    /* return success; */
	    if (success != NULL) {
		return success;
	    } else {
		goto try_from_top_frame;
	    }
	} else if (index < 0) {
	    /* only translated path available */
	    next_char = lxcn_translate(next_char);
	    index = tndex;
	} else if (tndex >= 0) {
	    /* both paths available */
	    unsigned tffset = *(long*)align(node + size + WORD_SZ * tndex);
	    save_frame(fp, tffset, input, lex_end, lxcn_translate(next_char));
	    fp = push_frame(data);
	} else {
	    /* only untranslated path available */
	    /* no action needed */
	}
    } else if (index < 0) {
	pop_frame(data);
	/* return success; */
	if (success != NULL) {
	    return success;
	} else {
	    goto try_from_top_frame;
	}
    }
    /*Ftrans *lex_end++ = next_char;	** moved below */
    /*
    // Fetch offset of next trie node. If we have found
    // an info, remember the current offset and input position,
    // and return the last prefix.
    */
    offset = *(long*)align(node + size + WORD_SZ * index);
    if (success != NULL)
    {
      save_frame(fp, offset, input, lex_end, next_char);
      return success;
    }
    *lex_end++ = next_char;	/* Ftrans moved here */
  }
}

#endif /* !USE_OLD_SEARCH_CODE */

