// Debugging utility for dumping the .lex file.
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU Library General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: dumptrie.cc,v 1.10 2005/08/18 11:50:01 olafs Exp $

using namespace std;
#include <ctype.h>
#include <sys/types.h>
#include <unistd.h>
#include <string>
#include <vector>
#include <iostream>
#include <fstream.h>
#include <iomanip.h>

//------------------------------------------------------------------------------
// class Trie
//------------------------------------------------------------------------------

class Trie {
    unsigned char* trie;
    unsigned long size;

    void print(unsigned char*, unsigned char*, unsigned, ostream&);

public:
    Trie();
    Trie(istream&);
    ~Trie();

    void read(istream&);
    void dump(ostream&);
    void print(ostream&);
};

inline
Trie::Trie() {
    trie = 0;
    size = 0;
}

inline
Trie::Trie(istream& is) {
    read(is);
}

inline
Trie::~Trie() {
    if (trie) {
        delete[] trie;
    }
}

inline ostream&
operator<<(ostream& os, Trie& trie) {
    trie.print(os);
    return os;
}

inline istream&
operator>>(istream& is, Trie& trie) {
    trie.read(is);
    return is;
}

void
Trie::read(istream& is) {
    is.read((char *)(&size), static_cast<int>(sizeof(unsigned long)));
    cout << "size = " << size << endl;
    trie = new unsigned char[size]; 
    is.read((char *)(trie), static_cast<int>(size));
}

void
Trie::dump(ostream& os) {
    for (unsigned i = 0; i < size; i++) {
        os << i << '\t';
        if (isalpha(trie[i])) {
            os << trie[i] << endl;
        } else {
            os << (unsigned)trie[i] << endl;
        }
    }
}

void
Trie::print(ostream& os) {
    if (trie) {
        unsigned char word[1024];
        print(word, word, 0, os);
    }
}

inline unsigned
Align(unsigned v, unsigned b = 4) {
    unsigned mod = v % b;
    return mod ? v + b - mod : v;
}

void Trie::print(unsigned char* word, unsigned char* p, unsigned offset, ostream& os)
{
  unsigned char path = trie[offset++];
  unsigned char size = trie[offset++];

  for (int i = 0; i < path; i++)
    *p++ = trie[offset++];

  unsigned info_offset = Align(offset + size);
  for (int i = 0; i < size; i++)
  {
    unsigned info = *(unsigned*)(trie + info_offset);
    if ((*p = trie[offset++]))
      print(word, p + 1, info, os);
    else
      os << info << '\t' << info_offset << "\t\"" << word << '"' << endl;
    info_offset += 4;
  }
}

//------------------------------------------------------------------------------
ifstream lexfile;

long
lex_set() {
    long s;

    lexfile.read((char*)&s, sizeof(long));

    return s;
}

long
lex_byte() {
    char c;

    lexfile.read(&c,1);

    return c;
}

long
lex_word() {
    char i = lex_byte();
    char j = lex_byte();

    return (i<<8)|j;
}

long
lex_codes() {
    unsigned long c;

    lexfile.read((char*)&c, sizeof(long));

    return c;
}

string
lex_string() {
    string s = "";
    char c;

    while((c = lex_byte()) != 0) {
        s += c;
    }

    return s;
}

size_t
lex_size() {
    size_t size;

    lexfile.read((char*)&size, sizeof(size_t));

    return size;
}

off_t
lex_index() {
    off_t offset;

    lexfile.read((char*)&offset, sizeof(off_t));

    return offset;
}

bool
lex_bool() {
    long val = lex_byte();
    return ((char) val == 'T');
}

//------------------------------------------------------------------------------

enum LexParamTypes {
    LexParamLattice = 1,
    LexParamText = 2,
    LexParamInt = 3
};

typedef struct {
    string name;
    unsigned arity;
    char* par_info;
} EntryData;
vector<EntryData> nont_arity;

void
read_set_affix_lhsses(unsigned aff_size)
{
    unsigned j = aff_size;
    while (j > 0) {
        unsigned term_idx = lex_index();
        cout << "<" << term_idx << ">";
        j--;
        if (j > 0) {
            cout << " | ";
        }
    }
    cout << endl;
}

vector<string> affix_list;

void
read_set_affixes() {
    unsigned nr_affixes = lex_size();
    cout << "There are " << nr_affixes << " set affixes:" << endl;

    for (unsigned i = 0; i < nr_affixes; i++) {
        string name = lex_string();
        long set = lex_set();
        cout << "affix " << i << ": \"" << name << "\" (" << set << ")";
        affix_list.push_back(name);
        bool is_nonterminal = lex_bool();
        unsigned aff_size = lex_size();
        if (!is_nonterminal) {
            cout << ", terminal belonging to ";
        } else {
            cout << ", nonterminal belonging to ";
        }
        cout << aff_size << " LHSses: ";
        read_set_affix_lhsses(aff_size);
    }
}

void
read_nonterminals() {
    unsigned nr_nonterminals = lex_size();
    cout << "There are " << nr_nonterminals << " nonterminals:" << endl;
    for (unsigned i = 0; i < nr_nonterminals; i++) {
        string name = lex_string();
        EntryData entry_data;

        if (name == "") {
            cout << "nonterminal " << i << ": <empty>" << endl;
            entry_data.name = "<empty>";
            entry_data.arity = 0;
            entry_data.par_info = 0;
        } else {
            unsigned arity = lex_word();

            entry_data.name = name;
            entry_data.arity = arity;
            entry_data.par_info = new char[arity];

            cout << "nonterminal " << i << ": \"" << name << "\" (" << arity << ")" << endl;
            for (unsigned j = 0; j < arity; j++) {
                entry_data.par_info[j] = lex_byte();
                unsigned aff_idx;

                switch(entry_data.par_info[j]) {
                    case LexParamLattice:
                        aff_idx = lex_index();
                        cout << " <lattice:" << aff_idx << " ("
                             << affix_list[aff_idx] << ")>";
                        break;
                    case LexParamText:
                        cout << " <text>";
                        break;
                    case LexParamInt:
                        cout << " <int>";
                        break;
                    default:
                        cout << " <unknown>";
                }
            }
            cout << endl;
        }
        nont_arity.push_back(entry_data);
    }
}

void
read_entries() {
    long nr_entries = lex_size();

    cout << "-- There are " << nr_entries << " entries." << endl;
    for (int i = 0; i < nr_entries; i++) {
        cout << "Entry " << i << endl;

        long nont_nr = lex_index();
        EntryData entry_data = nont_arity[nont_nr];
        cout << "\tNonterminal nr: " << nont_nr << " (" << entry_data.name << ")" << endl;

        unsigned arity = entry_data.arity;
        char *par_info = entry_data.par_info;

        cout << "\tFrequency " << lex_set() << endl;

        cout << "\tParameters:";
        for (unsigned par_nr = 0; par_nr < arity; ++par_nr) {
            unsigned idx = lex_index();
            switch(par_info[par_nr]) {
                case LexParamLattice:
                    cout << " <lattice:" << idx << " (" << affix_list[idx]
                         << ")>";
                    break;
                case LexParamText:
                    cout << " <text:" << idx << ">";
                    break;
                case LexParamInt:
                    cout << " <int:" << idx << ">";
                    break;
                default:
                    cout << " <unknown:" << idx << ">";
            }
        }
        cout << "." << endl;
    }
}

void
read_entry_pairs() {
    unsigned nr = lex_size();
    cout << "---- read_entry_pairs: " << nr << " pairs" << endl;
    for (unsigned i = 0; i < nr; ++i) {
        long idx = lex_index();
        long ptr = lex_index();
        cout << "\t" << i << ": <" << idx << ", " << ptr << ">" << endl;
    }
    cout << "----" << endl;
}

void
read_int_affixes() {
    unsigned nr = lex_size();
    cout << "---- read_int_affixes: " << nr << " affixes" << endl;
    for (unsigned i = 0; i < nr; ++i) {
        long val = lex_set();
        cout << "\t" << i << ": " << val << endl;
    }
    cout << "----" << endl;
}

void
read_text_affixes() {
    unsigned nr = lex_size();
    cout << "---- read_text_affixes: " << nr << " affixes" << endl;
    for (unsigned i = 0; i < nr; ++i) {
        cout << "\t" << lex_string() << endl;
    }
    cout << "----" << endl;
}

void
dump_entries() {
    read_int_affixes();
    read_text_affixes();
    read_set_affixes();
    read_nonterminals();
    read_entries();
    read_entry_pairs();
}

//------------------------------------------------------------------------------
int main(int argc, char* argv[])
{
    Trie trie;

    if (argc != 2) {
        cout << "Usage: dumptrie <file>" << endl;
        exit(1);
    } else {
        lexfile.open(argv[1], ios::in|ios::binary);
        if (!lexfile) {
            cout << "Error: cannot open file " << argv[1] << endl;
            exit(1);
        }
    }

    cout << "--- Trie:" << endl;
    lexfile >> trie;
    cout << trie; 

    cout << "--- Entries:" << endl;
    dump_entries();
}
