// Implementation of the LIFParser class methods.
//
// Copyright 2001, KUN
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: lifparser.cc,v 1.16 2003/09/27 00:46:42 murphy Exp $

using namespace std;
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <string.h> // for strcmp(), strlen(), memset()
#include <string>

#include <lexicon.h>
#include "globals.h"
#include "globlexafflists.h"
#include "globtables.h"
#include "lexnontermlist.h"
#include "lifparser.h"

#ifdef DEBUG_LIF
#define DB(x) x
#else
#define DB(x)
#endif

/*
**------------------------------------------------------------------------------
** .lif file parsing
**------------------------------------------------------------------------------
*/

int
hex_to_int(char c)
{
    long res = -1;

    if ((c >= '0') && (c <= '9')) {
        res = c - '0';
    }

    if ((c >= 'A') && (c <= 'F')) {
        res = c - 'A' + 10;
    }

    if ((c >= 'a') && (c <= 'f')) {
        res = c - 'a' + 10;
    }

    return res;
}

SET
convert_txt_to_bitset(string txt)
{
    SET bitset = 0;
    const char* t = txt.c_str();

    while (isxdigit(*t)) {
        bitset = bitset * 16 + hex_to_int(*t++);
    }

    return bitset;
}

bool
LifParser::parse_lif(string lif_name, class LexNontermList *lex_nonterm_List)
{
    lif = new LIFFile(lif_name);
    if (lif == NULL || lif->unreadable_file()) {
	return false;
    }

    // Lexicon module list:
    while (!lif->is_seperator_line()) {
        string name = lif->get_text();
	add_to_lex_modules(name);
    }

    // Affix table:
    string last_affnont_name;
    while (!lif->is_seperator_line()) {
        string name = lif->get_text();
        idtable.add(name);
        unsigned idx = lif->get_number();
        string bitset_txt = lif->get_text();
        SET bitset = convert_txt_to_bitset(bitset_txt);
        string flag = lif->get_text();
        DB(cerr << name << "\tidx\t" << dec(bitset) << "\t" << flag << endl);
        if (flag == "lhs") {
            last_affnont_name = name;
            lex_affixname_list.add_affix(name, idx, bitset, "");
            lex_affixname_list.mark_as_nonterminal(name);
        } else { // flag == terminal
            lex_affixname_list.add_affix(name, idx, bitset, last_affnont_name);
        }
    }

    // Syntax nonterminal table:
    while (!lif->is_seperator_line()) {
        string name = lif->get_text();
        unsigned arity = lif->get_number();
        unsigned idx = lif->get_number();

        idtable.add(name);
        pParam params = 0;
        read_parameters(arity, params);
        lex_nonterm_List->add_nonterm(name, params, idx);
    }

    delete lif;
    return true;
}

void
LifParser::read_parameters(int to_go, pParam& params)
{
    if (!to_go) return;

    string param_str = lif->get_text();
    Param param;
    if (param_str == "TEXT") {
        param = Param(AnyText);
    } else if (param_str == "INT") {
        param = Param(AnyInt);
    } else {
        ID id = idtable.add(param_str);
        param = Param(IdType, id);
    }
    read_parameters(to_go - 1, params);
    params = param_table.enter(param, params);
}

bool LifParser::no_lif_file(string lif_name)
{
    string filename = lif_name;
    struct stat stats;

    int res = stat(filename.c_str(), &stats);

    return res != 0;
}


//------------------------------------------------------------------------------
//
// LIFFile stuff:
//
//------------------------------------------------------------------------------

LIFFile::LIFFile(string lif_name)
{
    liffilebuflen = 1024;
    liffilebuf = new char[liffilebuflen];
    lifbufptr = liffilebuf;

    lif_file = fopen(lif_name.c_str(), "r");
    if (lif_file) {
        read_next_line();
    }
}

LIFFile::~LIFFile()
{
    fclose(lif_file);
    delete liffilebuf;
}

unsigned
LIFFile::get_number()
{
    string res_str = get_text();
    return atoi(res_str.c_str());
}

string
LIFFile::get_text()
{
    string res = "";

    while (!is_column_seperator() && !end_of_line()) {
        res += *lifbufptr;
        lifbufptr++;
    }

    skip_column_seperator();

    return res;
}

bool
LIFFile::is_seperator_line()
{
    if (strcmp(liffilebuf, "*****") == 0) {
        read_next_line();
        return true;
    } else {
        return false;
    }
}

bool
LIFFile::is_column_seperator()
{
    return (*lifbufptr == '\t');
}

void
LIFFile::skip_column_seperator()
{
    if (is_column_seperator()) {
        lifbufptr++;
    }
    end_of_line_check();
}

bool
LIFFile::end_of_line()
{
    return strlen(lifbufptr) == 0;
}

void
LIFFile::end_of_line_check()
{
    if (end_of_line()) {
        read_next_line();
    }
}

void
LIFFile::read_next_line()
{
    DB(cerr << "reading next line from liffile" << endl);
    memset(liffilebuf, 0, liffilebuflen);

    int in_c;
    lifbufptr = liffilebuf; 
    do {
        in_c = getc(lif_file);
        *lifbufptr = in_c;
        lifbufptr++;
    } while ((in_c != EOF) && (in_c != '\n') && (lifbufptr != (liffilebuf + liffilebuflen - 1)));
    lifbufptr--;
    *lifbufptr = '\0';

    lifbufptr = liffilebuf; 
    DB(cerr << "buffer holds ->" << liffilebuf << "<-" << endl);
}
