// AVL-trie routines
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: avltrie.cc,v 1.13 2003/11/26 22:15:13 pspiertz Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

using namespace std;
#include <string>
#include <vector>
#include <stddef.h>
#include <iostream>
#include <iomanip>
#include "lexfileio.h"
#include "key.h"
#include "entry.h"
#include "lexentrylistidx.h"
#include "avltrie.h"

//------------------------------------------------------------------------------
// Balancing stuff:
//------------------------------------------------------------------------------

int inverse(int balance)
{
    return 1 - balance;
}

//------------------------------------------------------------------------------
// Function:
//	 AvlNode::AvlNode(const char* str, const EntryList& entries)
//
// Description:
//	Construct new trie node. If str is not empty, its head
//	character is stored as key, and its associated value points
//	to a new trie in which the tail of str is stored recursively.
//	If str is end-of-string, a copy of entries is assigned to the
//	nodes value.
//------------------------------------------------------------------------------

AvlNode::AvlNode(const char* str, EntryList& entries)
{
    key = head(str);
    balance = Balanced;
    marker = 0;
    sons[Left] = 0;
    sons[Right] = 0;

    if ((str = tail(str)))
    {
        val.next = new AvlTrie();
        val.next->enter(str, entries);
    } else {
        val.entries = new EntryList(entries);
    }
}

EntryList*
AvlNode::contains(const char* str) const
{
    AvlNode* node = (AvlNode*)this;

    while (node) {
        int cmp = compare(node->get_key(), head(str));

        if (cmp < 0) {
            node = node->sons[Right];
        } else if (cmp > 0) {
            node = node->sons[Left];
        } else {
            if ((str = tail(str))) {
                node = node->next()->get_node();
            } else {
                return node->get_entries();
            }
        }
    }

    return 0;
}

AvlNode*
AvlNode::rotate(int dir)
{
    AvlNode* son = sons[dir];

    if (son == 0) {
        return this;
    }

    AvlNode* tmp = son->sons[inverse(dir)];
    son->sons[inverse(dir)] = this;
    sons[dir] = tmp;

    return son;
} 

//------------------------------------------------------------------------------
// Function:
//	AvlNode*
//	AvlNode::enter(const char* str,
//	               const EntryList& entries,
//	               unsigned& size)
//
// Description:
//	Enter str with entries to trie node, while keeping trie balanced.
//	If a new node is allocated, size is incremented. If a duplicate
//	str is found, a copy of entries is appended to the old entries.
//
// Return value:
//	Pointer to new top node of trie.
//------------------------------------------------------------------------------

AvlNode*
AvlNode::enter(const char* str, EntryList& entries, unsigned& size)
{
    int cmp, dir = 0;
    AvlNode* node = this;
    AvlNode* father = 0;
    AvlNode* critical = this;
    AvlNode* critical_father = 0;

    // Find node with key or enter key and entries in new leaf.
    // For rebalancing we need the critical node, i.e.
    // the last unbalanced node on the search path.

    KeyType key = head(str);
    while (node) {
        if (node->balance != Balanced) {
            critical = node;
            critical_father = father;
        }

        cmp = compare(node->get_key(), key);
        if (cmp < 0) {
            dir = Right;
        } else if (cmp > 0) {
            dir = Left;
        } else {
            // Duplicate found:
            if ((str = tail(str))) {
                node->next()->enter(str, entries);
            } else {
                node->get_entries()->join_entrylists(entries);
            }

            return this;
        }

        father = node;
        node->prev_dir = dir;
        node = node->sons[dir];
    }

    AvlNode* inserted = new AvlNode(str, entries);
    father->sons[dir] = inserted;
    size++;

    // Set balance factors of previously balanced nodes
    // on path between critical node and new node to 
    // direction of the new node.

    int critical_dir = critical->prev_dir;
    node = critical->sons[critical_dir];
    while (node != inserted) {
        dir = node->prev_dir;
        node->balance = dir;
        node = node->sons[dir];
    }

    // Maybe rebalance tree at critical node, resulting in a new tree.

  if (critical->balance == Balanced) {
    critical->balance = critical_dir;
  } else if (critical->balance != critical_dir) {
    critical->balance = Balanced;
  } else {
      // Rebalance: 
        AvlNode* critical_son = critical->sons[critical_dir]; 
        if (critical_son->balance == critical->balance) {
            critical->balance = Balanced;
            critical_son->balance = Balanced;
            node = critical->rotate(critical_dir);
        } else {
            int pbal = critical->balance;
            int rbal = critical_son->sons[inverse(critical_dir)]->balance; 
            critical->balance = pbal == rbal ? inverse(rbal) : Balanced;
            critical_son->balance = pbal == inverse(rbal) ? inverse(rbal) : Balanced;
            critical_son->sons[inverse(critical_dir)]->balance = Balanced;
            critical->sons[critical_dir]= critical_son->rotate(inverse(critical_dir));
            node = critical->rotate(critical_dir);
        }

    // If the critical node has a father, the critical node
    // is replaced with the new root of the subtree.

        if (critical_father) {
            dir = critical_father->prev_dir;
            critical_father->sons[dir] = node;
        } else {
            return node;
        }
    }

    return this;
}

void
AvlNode::print(ostream& os, unsigned indent,
		LexEntryListIdx *lex_entry_list_Idx) const
{
    AvlNodeIter	iter(this);
    AvlNode*	node;

    while ((node = iter.next())) {
        os << setw(indent) << "";

        if (has_tail(node->get_key())) {
            os << text(node->get_key()) << endl;
            node->next()->print(os, indent + 1, lex_entry_list_Idx);
        } else {
            os << text(node->get_key());

            EntryList& e = *node->get_entries();
            unsigned idx = lex_entry_list_Idx->add_entries(e);

            os << " --> " << idx << endl;
        }
    }
}

void
AvlTrie::print(ostream& os, unsigned indent,
		LexEntryListIdx *lex_entry_list_Idx) const
{
    if (node) {
        node->print(os, indent, lex_entry_list_Idx);
    }
}

//------------------------------------------------------------------------------
// Function: void AvlNode::delete_sons()
//
// Description:
//	 Delete sons on postorder visit using DSW marking.
//------------------------------------------------------------------------------

void
AvlNode::delete_sons()
{
    AvlNode *node = this;
    AvlNode *stack = 0;
    int     visit_left;

    while ((visit_left = node && !node->is_marked()) || stack) {
        if (visit_left) {
            // preorder visit node; pushleft
            AvlNode* tmp = node->sons[Left];
            node->sons[Left] = stack;
            stack = node;
            node = tmp;
        } else {
            if (node) {
                node->unmark();
            }

            if (!stack->is_marked()) {
                // inorder visit stack; leftright
                stack->mark();
                AvlNode* tmp = stack->sons[Left];
                stack->sons[Left] = node;
                node = stack->sons[Right];
                stack->sons[Right] = tmp;
            } else {
                // popright; post order visit node
                AvlNode* tmp = stack->sons[Right];
                stack->sons[Right] = node;
                node = stack;
                stack = tmp;

                if (has_tail(node->get_key())) {
                    delete node->next();
                } else {
                    delete node->get_entries();
                }

                delete node->sons[Left];
                delete node->sons[Right];
            }
        }
    }

    node->unmark();
}

AvlTrie::~AvlTrie()
{
    if (node) {
        node->delete_sons();
        delete node;
    }
}

struct AvlTrie::AuxWritingData
{
    typedef pair<char *, size_t> OutRownodePtrSiz;
    typedef vector<OutRownodePtrSiz> OutRownodes;
    class OutRownodeLess
    {
      public:
	bool operator() (OutRownodePtrSiz orw1, OutRownodePtrSiz orw2) const
	{
	    if (orw1.second < orw2.second) {
		return true;
	    } else if (orw1.second > orw2.second) {
		return false;
	    }
	    return memcmp(orw1.first, orw2.first, orw2.second) < 0;
	}
    };
    typedef map<OutRownodePtrSiz, size_t, OutRownodeLess> OutRownodeMap;

	AuxWritingData(LexEntryListIdx *lelI):
	    lex_entry_list_Idx(lelI), out_total_nbytes(0) {}
	size_t get_out_total_nbytes() const
	    { return out_total_nbytes; }
	unsigned lex_entry_list_Idx_add_entries(EntryList& entries)
	    { return lex_entry_list_Idx->add_entries(entries); }
	size_t claim_outspace(char *o_buf, size_t const o_siz)
	{
	    out_rownodes.push_back(make_pair(o_buf, o_siz));
	    out_total_nbytes += o_siz;
	    return out_total_nbytes - o_siz; // offset
	}
	void forget_outspace(size_t const offset)
	{
	  //OutRownodePtrSiz o_p = out_rownodes[out_rownodes.size()-1];
	    OutRownodePtrSiz o_p = out_rownodes.back();
	    if (offset != out_total_nbytes - o_p.second) {
		cerr << "Abort: forget_outspace upto offset " << offset
		     << ", should be " << out_total_nbytes
		     << " - " << o_p.second << endl;
		exit(2);
	    }
	    out_total_nbytes = offset;
	    out_rownodes.pop_back();
	}
	size_t find_rownode(char *r_buf, size_t const r_siz) const
	{
	    OutRownodeMap::const_iterator const r_it
		= out_map.find(make_pair(r_buf, r_siz));
	    if (r_it != out_map.end()) {
		return r_it->second;
	    }
	    return 0;
	}
	size_t insert_rownode(char *r_buf, size_t const r_siz, size_t r_off)
	{
	    pair<OutRownodeMap::iterator, bool> insresult
		= out_map.insert(make_pair(make_pair(r_buf, r_siz), r_off));
	    return insresult.first->second;	// offset found or r_off
	}
	void write_out_data(ostream& os) const
	{
	    for (OutRownodes::const_iterator rni = out_rownodes.begin();
				rni != out_rownodes.end(); ++rni) {
		os.write(rni->first, rni->second);
	    }
	}
    private:
	LexEntryListIdx *lex_entry_list_Idx;
	OutRownodes out_rownodes;
	OutRownodeMap out_map;
	size_t out_total_nbytes;
};

typedef unsigned * offout_t;
static inline unsigned align(unsigned v)
{
    unsigned b = sizeof(offout_t);
    unsigned mod = v % b;
    return mod ? v + b - mod : v;
}

//------------------------------------------------------------------------------
//PARTIALLY OUTDATED COMMENT:
// Description:
//	Write entries identified by entries in trie to stream os.
//	Each entries is preceeded by its offset in the flat representation
//	of the trie. Parameter pos is the offset of the current trie.
//	The flat representation is as follows: l = path length,
//	n = nr. of branches, l chars on path, n branching chars, align,
//	n pointers to branches.
//
// Side effects:
//	In each trie, the offset pos is stored for use when generating
//	the trie itself.
//------------------------------------------------------------------------------

//------------------------------------------------------------------------------
// Function:
//	AvlTrie::generate_trie(AuxWritingData *aux_writing_data)
// Description:
//	Store flat representation of trie in output buffer.
//      A sequence of tries of size 1 is called a path, and will be
//	represented as a string with its length (path compression).
//	Each trie is stored as follows: length of path = l, nr of
//	branches = n, l chars on path, n chars of branches, align,
//	n offsets of next tries and entries counters.
// Return value:
//	Offset of this trie (a.k.a. rownode) or an identical one
//	(if one was already put at a lower offset).
//------------------------------------------------------------------------------

#define MAX_TRIE_PATH_LEN 256
size_t
AvlTrie::generate_trie(AuxWritingData *aux_writing_data) const
// map rownode2rnodenr
{
    const AvlTrie* trie;

    // Store characters on path
    char path_data[MAX_TRIE_PATH_LEN];
    unsigned path_len = 0;
    for (trie = this;
         trie->get_size() == 1 && has_tail(trie->get_node()->get_key())
                               && path_len < MAX_TRIE_PATH_LEN;
         trie = trie->get_node()->next()) {
        path_data[path_len++] = trie->get_node()->get_key();
    }

    // Calculate offset of pointers.
    unsigned const entry_pos = align(2 + path_len + trie->get_size());
    unsigned const data_size = entry_pos + sizeof(offout_t) * trie->get_size();
    char * flat = new char[data_size];
    size_t const offset = aux_writing_data->claim_outspace(flat, data_size);

    // Store path length and number of branches.
    flat[0] = (char)path_len;
    flat[1] = (char)trie->get_size();
    memcpy(flat + 2, path_data, path_len);
    unsigned pos = 2 + path_len;

    offout_t entry_ptr = (offout_t)(flat + entry_pos);

    // Store characters and pointers of branches.
    // For leafs, store entry counter.
    AvlNodeIter	iter(trie->get_node());

    while (AvlNode* next = iter.next()) {
        flat[pos++] = next->get_key();

        if (has_tail(next->get_key())) {
            *entry_ptr++ = next->next()->generate_trie(aux_writing_data);
        } else {
            EntryList* entries = next->get_entries();
            *entry_ptr++
		= aux_writing_data->lex_entry_list_Idx_add_entries(*entries);
        }
    }

    while (pos < entry_pos) {
        flat[pos++] = '\0';
    }

    size_t const found_off
	= offset;//= aux_writing_data->insert_rownode(flat, data_size, offset);
    if (found_off != offset) {
	// already existed
	aux_writing_data->forget_outspace(offset); // exits if not last segment
	delete []flat;
    }
    return found_off;
}

void
AvlTrie::write_output(ostream& os, LexEntryListIdx *lelI) const
{
    AuxWritingData awd(lelI);

    //write_timed(" generate_trie");
    // unsigned trie_sz to keep format compatible, TODO make it size_t
    unsigned trie_sz
	= node ? (generate_trie(&awd), awd.get_out_total_nbytes()) : 0;
    may_log_writing_size(trie_sz, " trie bytes to file");
    os.write((char*)&trie_sz, sizeof(long));
    if (node) {
	awd.write_out_data(os); // Write flat representation of trie.
    }
}

void
AvlTrie::enter(const char* key, EntryList& entries)
{
    if (node) {
        node = node->enter(key, entries, size);
    } else {
        node = new AvlNode(key, entries); size++;
    }
}

EntryList*
AvlTrie::contains(const char* key) const
{
    if (node) {
        return node->contains(key);
    } else {
        return 0;
    }
}

//------------------------------------------------------------------------------
// AvlNodeIter class routines:
//------------------------------------------------------------------------------

//------------------------------------------------------------------------------
// Function:
//	AvlNode* AvlNodeIter::next()
//
// Description:
//	Return pointer to next entries and set next key
//	on each inorder visit using DSW marking.
//------------------------------------------------------------------------------

AvlNode* AvlNodeIter::next()
{
    int visit_left;

    while ((visit_left = node && !node->is_marked()) || stack) {
        if (visit_left) {
            // preoder visit node; pushleft
            AvlNode* tmp = node->sons[AvlNode::Left];
            node->sons[AvlNode::Left] = stack;
            stack = node;
            node = tmp;
        } else {
            if (node) {
                node->unmark();
            }

            if (!stack->is_marked()) {
                // inorder visit stack; leftright
                stack->mark();
                AvlNode* tmp = stack->sons[AvlNode::Left];
                stack->sons[AvlNode::Left] = node;
                node = stack->sons[AvlNode::Right];
                stack->sons[AvlNode::Right] = tmp;
                return stack;
            } else {
                // popright; post order visit node
                AvlNode* tmp = stack->sons[AvlNode::Right];
                stack->sons[AvlNode::Right] = node;
                node = stack;
                stack = tmp;
            }
        }
    }

    if (node) {
        node->unmark();
    }

    return 0;
}
