%{
// Scanner module for usage with the bison generated parser.
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: scanner.l,v 1.13 2005/08/24 12:18:54 olafs Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif // HAVE_CONFIG_H

#include <stdlib.h>

using namespace std;
#include <string.h>

#include "parserinterface.h"
#include "parser.h"
#include "abase_lex_input.h"

char* str_input_cpy(char* src, int len);
static int col_nr = 0;

#define YY_NO_UNPUT		1
#define YY_NEVER_INTERACTIVE	1

%}

%option yylineno
%option noyywrap

LAYOUT      [ \t\r\015\032]+
COMMENT     #.*
NUMBER      0|[1-9][0-9]*
DIGIT	    [0-9]
STR_CHARS   ((\\.)|[ !#-\[\]-~\177-\377])+
STRING      \"{STR_CHARS}\"
ORNAMENT    [$+\-?@^~_]
LETTER      [a-zA-Z\177-\377]
LETMENT	    ({LETTER}|{DIGIT}|{ORNAMENT})
IDPART      {LETTER}{LETMENT}*


%%

\n              { col_nr = 1; return NEWLINE; }
{COMMENT}   	{ col_nr += yyleng; }
{LAYOUT}    	{ col_nr += yyleng; }
{NUMBER}    	{
                    col_nr += yyleng;
                    yylval.number = atoi(yytext);
                    return NUMBER;
                }
{STRING}        {
                    col_nr += yyleng;
                    yylval.chars = str_input_cpy(yytext, yyleng);
                    return STRING;
                }
{IDPART}	{
                    col_nr += yyleng;
                    yylval.string_ptr = new string(yytext);
                    return IDPART;
                }
::          	{ col_nr += yyleng; return DOUBLECOLON; }
:           	{ col_nr += yyleng; return COLON; }
\.{NUMBER}      { col_nr += yyleng; return PERIODNUMBER; }
\.          	{ col_nr += yyleng; return PERIOD; }
,           	{ col_nr += yyleng; return COMMA; }
;           	{ col_nr += yyleng; return SEMICOLON; }
\(          	{ col_nr += yyleng; return OPENPAR; }
\)          	{ col_nr += yyleng; return CLOSEPAR; }
\|          	{ col_nr += yyleng; return VBAR; }
&               { col_nr += yyleng; return AMPERSAND; }
\/              { col_nr += yyleng; return SLASH; }
\{              { col_nr += yyleng; return OPENGUARD; }
\}              { col_nr += yyleng; return CLOSEGUARD; }
\[              { col_nr += yyleng; return OPENOPT; }
\]              { col_nr += yyleng; return CLOSEOPT; }
.           	{ col_nr += yyleng; return yytext[0]; }

%%

int get_line_nr()
{
    return yylineno;
}

int get_col_nr()
{
    return col_nr;
}

/*------------------------------------------------------------------------------
// Function:
//	char* lexeme_copy(char* dst, char* src, int len)
//
// Description:
//	Copy quoted string src with length len to dst.
//	Reduce multiple spaces or tabs to one space.
//	Strip quotes, and strip hyphens from prefixes, suffixes and infixes.
//	Replace \- with -, \\ with \, \n with newline, \t with tab, \" with "
//	Insert control codes before prefixes, infixes and suffixes.
//
// Return value:
//	Pointer dst.
//
// Side Effects:
//	Contents of dst are overwritten.
//
// Memory management:
//	None.
//
// Note:
//	String src should not be preceeded or followed by layout.
//	String dst should be at least len + 1 bytes long.
//
//	Adaptions to lexeme_copy() should also be applied to other components
//	of lexicon system (lexicon, agfl, and rts).
//----------------------------------------------------------------------------*/
static char *lexeme_copy(char* dst, char* src, int len)
{
  int	layout;
  int	prefix = 0;
  int	suffix = 0;
  int	multi_token = 0;
  char	c;
  char* p = src;
  char* d = dst;

	// strip quotes
  src[len - 1] = '\0';
  src += 1;
  len -= 2;

	// strip trailing and leading layout
  while ((c = *src), (c == ' ') || (c == '\t'))
    { src++; len--; }
  while ((c = src[len - 1]), (c == ' ') || (c == '\t'))
    src[--len] = '\0';

	// strip prefix and suffix marks
  if (*src == '-')
  {
    suffix = 1;
    src++; len--;
  };
  if ((len > 0) && (src[len - 1] == '-')
      && !((len > 1) && (src[len - 2] == '\\')))
  {
    prefix = 1;
    src[--len] = '\0';
  };

  /* strip trailing and leading layout */
  while ((c = *src), (c == ' ') || (c == '\t'))
    { src++; len--; }
  while ((c = src[len - 1]), (c == ' ') || (c == '\t'))
    src[--len] = '\0';

  /* maybe there were only hyphens and layout in wordform */
  if (!*src)
  {
    if (prefix && suffix)
      strcpy(dst, "--");
    else if (prefix || suffix)
      strcpy(dst, "-");
    else
      strcpy(dst, " ");
    return dst;
  };

  /* check for multi token */
  while ((c = *p++))
  {
    if ((c == ' ') || (c == '\t'))
    {
      multi_token = 1;
      break;
    };
  }; 

  /* mark word form with type */
  if (multi_token)
  {
    *d++ = abs_MultiTokenMark;
    if (suffix)
      *d++ = '-';
  }
  else if (prefix && suffix)
    *d++ = abs_InfixMark;
  else if (prefix)
    *d++ = abs_PrefixMark;
  else if (suffix)
    *d++ = abs_SuffixMark;

  /* copy word form */
  layout = 1;
  while ((c = *src++))
  {
    switch(c)
    {
      case ' ':
      case '\t':
        if (!layout)
        {
          *d++ = ' ';
          layout = 1;
        };
        break;
      case '\\':
        c = *src++;
        switch (c)
        {
          case '-':
          case '"':
          case '\\':
            *d++ = c;
            break;
          case 'n':
            *d++ = '\n';
            break;
          case 't':
            *d++ = '\t';
            break;
          default:
            *d++ = c;
        };
        layout = 0;
        break;
      default:
        *d++ = c;
        layout = 0;
    };
  };
  if (multi_token && prefix)
    *d++ = '-';
  *d = '\0';
  return dst;
}

char* str_input_cpy(char* src, int len)
{
    char* new_src;
    char* dst;

    new_src = strdup(src);
    dst = (char*) malloc(len);
    memset(dst, 0, len);
    dst = lexeme_copy(dst, new_src, len);
    free(new_src);

    return dst;
}

void reset_scanner_counters()
{
    yylineno = 1;
    col_nr = 1;
}

