// LexAffixNameList class implementation
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: lexaffixnamelist.cc,v 1.11 2003/11/26 22:15:13 pspiertz Exp $

using namespace std;
#include <iostream>
#include "lexfileio.h"
#include "lexaffixnamelist.h"

void
LexAffixNameList::dump(void) const
{
    LexAffixNameMap::const_iterator i = names.begin();

    cout << "---- LexAffixNameList::dump" << endl;
    cout << "size == " << size() << endl;

    while (i != names.end()) {
        cout << "\t" << i->first << " -> <";
        cout << (i->second).index << ", ";
        cout << (i->second).bitset << ", ";
        if (i->second.is_nonterminal) { cout << "nonterminal, "; } else { cout << "terminal, "; }
        cout << "<";
        set<string>::iterator j = i->second.lhs_list.begin();
        if (j != i->second.lhs_list.end()) {
            cout << *j;
            ++j;
            while (j != i->second.lhs_list.end()) { cout << ", " << *j; ++j; }
        }
        cout << "> >" << endl;

        ++i;
    }

    cout << "----" << endl;
}

void
LexAffixNameList::write_output(ostream& os) const
{
#ifdef DEBUG
    cerr << "---> LexAffixNameList size == " << size() << endl;
#endif
    write_lex_size_and_log(os, size(), " set affixes to file");

    for (unsigned idx = 0; idx < size(); ++idx) {
        LexAffixNameMap::const_iterator i = get_by_index(idx);

        write_lex_string(os, i->first.c_str());
#ifdef DEBUG
        cerr << "writing affix \"" << i->first << "\" (" << idx << ")" << endl;
#endif
        write_lex_set(os, i->second.bitset);
        write_lex_bool(os, i->second.is_nonterminal);
        write_lex_size(os, i->second.lhs_list.size());

        set<string>::iterator j = i->second.lhs_list.begin();
        while (j != i->second.lhs_list.end()) {
#ifdef DEBUG
            cerr << "\tasking for index of \"" << *j << "\"" << endl;
#endif
            string tmp = *j;
            unsigned index = get_index(tmp);
#ifdef DEBUG
            cerr << "\tgives " << index << endl;
#endif
            write_lex_index(os, index);
            ++j;
        }
    }
}

LexAffixNameMap::const_iterator
LexAffixNameList::get_by_index(const unsigned idx) const
{
    LexAffixNameMap::const_iterator i = names.begin();

    while (i != names.end()) {
        if (i->second.index == idx) {
            return i;
        }

        ++i;
    }

    cerr << "internal error in LexAffixNameList::get_by_index: unknown index " << idx << endl;
    dump();
    exit(1);
    return names.end(); // get rid of warning
}

unsigned
LexAffixNameList::get_index_of_set(string& n)
{
    LexAffixNameMap::const_iterator i = names.find(n);
    
    if (i != names.end()) {
        return i->second.index;
    }

    long bitset = 0;
    string rest = n;
    size_t barpos;

    while ((barpos = rest.find("|")) != string::npos) {
        string aff = rest.substr(0, barpos); 
        bitset |= names[aff].bitset;
        rest = rest.erase(0, barpos + 1);
    }
    
    bitset |= names[rest].bitset;
    
    unsigned the_idx = names.size();
#ifdef DEBUG
    cerr << "adding \"" << n << "\" with new index " << the_idx <<endl;
    assert(the_idx == names_length);
#endif
    LexAffixNameEntry e;
    e.index = the_idx;
    e.bitset = bitset;
    e.is_nonterminal = false;
//    e.lhs_list.insert("<anonymous>");
    names[n] = e;

    return the_idx;
}

bool LexAffixNameList::has_changed(LEXICON* old_lex)
{
    size_t old_nr_aff = lexicon_get_nr_setaffixes(old_lex);
    old_nr_aff -= calculate_nr_union_sets(old_lex);
    if (old_nr_aff != names.size()) {
#ifdef CMP_DEBUG
        cerr << "nr of affixes have changed from " << old_nr_aff;
        cerr << " to " << names.size() << endl;
#endif
        return true;
    }

    LexAffixNameMap::const_iterator i = names.begin();
    while (i != names.end()) {
        LexAffixNameEntry e = i->second;

        string old_name = lexicon_get_setaffix_name(old_lex, e.index);
        if (old_name != i->first) {
#ifdef CMP_DEBUG
            cerr << "name of affix " << e.index << " has changed from ";
            cerr << old_name << " to " << i->first << endl;
#endif
            return true;
        }

        SET old_bitset = lexicon_get_setaffix_bitset(old_lex, e.index);
        if (old_bitset != e.bitset) {
#ifdef CMP_DEBUG
            cerr << "bitset of affix " << e.index << " has changed from ";
            cerr << old_bitset << " to " << e.bitset << endl;
#endif
            return true;
        }

        gboolean old_is_nont = lexicon_get_setaffix_nont_flag(old_lex, e.index);
        if ((old_is_nont || e.is_nonterminal) && (!(old_is_nont && e.is_nonterminal))) {
#ifdef CMP_DEBUG
            cerr << "affix " << e.index << " has changed from/to nont" << endl;
#endif
            return true;
        }

        size_t old_nr_lhsses = lexicon_get_setaffix_nr_lhsses(old_lex, e.index);
        if (old_nr_lhsses != e.lhs_list.size()) {
#ifdef CMP_DEBUG
            cerr << "affix " << e.index << " had " << old_nr_lhsses;
            cerr << " lhsses and now has " << e.lhs_list.size() << endl;
#endif
            return true;
        }

        if (old_nr_lhsses) {
            set<string> old_lhsses;

            for(unsigned lhs_nr = 0; lhs_nr < old_nr_lhsses; ++lhs_nr) {
                old_lhsses.insert(lexicon_get_setaffix_lhs_name(old_lex,
                                                                e.index,
                                                                lhs_nr));
            }

            if (e.lhs_list != old_lhsses) {
#ifdef CMP_DEBUG
                cerr << "affix " << e.index << " has different lhsses" << endl;
#endif
                return true;
            }
        }
        
        ++i;
    }

    return false;
}

size_t LexAffixNameList::calculate_nr_union_sets(LEXICON* old_lex)
{
    size_t nr = lexicon_get_nr_setaffixes(old_lex);
    size_t res = 0;

    for (size_t i = 0; i < nr; ++i) {
        string name = lexicon_get_setaffix_name(old_lex, i);
        if (name.find("|") != string::npos) {
            res++;
        }
    }

#ifdef CMP_DEBUG
    cerr << "found " << res << " affixes names containing |" << endl;
#endif

    return res;
}
