/*
   File: posmemo.c
   Positive memoization routines.

   Copyright 2005 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: posmemo.c,v 1.31 2005/08/24 10:17:07 olafs Exp $"
*/

/* standard includes */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>

/* libabase includes */
#include <abase_error.h>
#include <abase_memalloc.h>

/* local includes */
#include "rtscode.h"
#include "rtslex.h" 
#include "posmemo.h"

#ifdef DEBUG_RTS
#define DB_RTS(x) x
#else
#define DB_RTS(x)
#endif

/*
   Memory management code for posmemo
   Actual management has not yet been implemented
*/
static void* posmemo_getmem (size_t s)
{ return (abs_malloc (s, "posmemo_getmem"));
}

static void posmemo_freemem (void* ptr)
{ abs_free (ptr, "posmemo_freemem");
}

void posmemo_init()
{ /* init memory management, not implemented yet */
}

void posmemo_done()
{ /* stop memory management, not implemented yet */
}

/*
   Define access function for posmemo structures
   For each position in the input trellis a positive memo for each
   syntax nonterminal is maintained (using a vector indexed by nont nr).

   Two special denotation are used to denote the unknown and blocked
   positive memo. 
*/
static inline PosMemo* posmemo_fetch (StateIndicator i, unsigned n)
{ assert(i);
  return (&(i->pos_memos[n]));
}

void posmemo_init_table_entry (PosMemo* entry)
{ *entry = POSMEMO_UNKNOWN;
}

int posmemo_is_unknown (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  return ((x != NULL) && ((*x) == POSMEMO_UNKNOWN));
}

int posmemo_is_known (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  return ((*x) != POSMEMO_UNKNOWN);
}

int posmemo_is_blocked (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  return ((*x) == POSMEMO_BLOCKED);
}

int posmemo_is_blocked_for_penlevel (StateIndicator input_state, unsigned nont_nr, long penlevel)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  if ((*x) == POSMEMO_BLOCKED) return (1);
  if ((*x) == POSMEMO_UNKNOWN)
     abs_bug ("posmemo_is_blocked_for_penlevel", "Posmemo is unknown");
  return ((*x) -> penalty > penlevel);
}

int posmemo_is_unblocked (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch(input_state, nont_nr);
  return (((*x) != POSMEMO_BLOCKED) && ((*x) != POSMEMO_UNKNOWN));
}

void posmemo_set_unknown (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  *x = POSMEMO_UNKNOWN;
}

void posmemo_set_blocked (StateIndicator input_state, unsigned nont_nr)
{ PosMemo *x = posmemo_fetch (input_state, nont_nr);
  if ((*x) != POSMEMO_UNKNOWN)
     abs_bug ("posmemo_set_blocked", "Posmemo is not unknown");
  *x = POSMEMO_BLOCKED;
}

/*
   Dumping of posmemo. This code is only used when DEBUG_POSMEMO is set
*/
static void posmemo_dump_pmprod (PosMemo pm_prod)
{ unsigned nr_formals = pm_prod -> nr_formals;
  unsigned nr_variables = pm_prod -> nr_variables;
  unsigned nr_sons = pm_prod -> nr_sons;
  unsigned *type_bits = (unsigned *)&(pm_prod -> variables [nr_variables + nr_sons]);
  unsigned ix;

  abs_printf ("PM %p:nont_nr=%u penalty=%ld #formals=%u -> ns=%p (fc=%p) %c ",
	      pm_prod, pm_prod -> nont_nr, pm_prod -> penalty, nr_formals,
	      pm_prod -> next_state, pm_prod -> failcont,
	      (pm_prod -> equiv != NULL)?'E': 'N');
  for (ix = 0; ix < nr_sons; ix++)
     { PMPROD *son = (PMPROD *)(pm_prod -> variables[nr_variables + ix].text_par);
       if (type_bits[ix / 32] & (1 << (ix % 32)))
          abs_printf ("LEX %p", son);
       else abs_printf ("%u:%u ",son -> nont_nr, son -> next_state -> pos);
     };
  abs_printf("\n");
  for (ix = 0; ix < nr_formals; ix++)
     abs_printf("\tformal %d set value: %lu\n", ix, (pm_prod -> variables)[ix].set_par);
}

void posmemo_rdump_pmprod (PosMemo pm_prod, int indent)
{ unsigned nr_formals = pm_prod -> nr_formals;
  unsigned nr_variables = pm_prod -> nr_variables;
  unsigned nr_sons = pm_prod -> nr_sons;
  unsigned *type_bits = (unsigned *) &(pm_prod -> variables [nr_variables + nr_sons]);
  unsigned ix;
  int iy;

  for (iy = 0; iy < indent; iy++) abs_printf (" ");
  abs_message (">PM %p:nont=%s (%u) penalty=%ld #formals=%u -> ns=%p (fc=%p) %c",
	       pm_prod, nonterm_names[pm_prod -> nont_nr], pm_prod -> nont_nr,
	       pm_prod -> penalty, nr_formals,
	       pm_prod -> next_state, pm_prod -> failcont,
	       (pm_prod -> prime != NULL)? 'E': 'N');
  for (ix = 0; ix < nr_formals; ix++)
     { for (iy = 0; iy < indent; iy++) abs_printf (" ");
       abs_message (">F%d: 0x%lx", ix, (pm_prod -> variables)[ix].set_par);
     };
  for (ix = 0; ix < nr_sons; ++ix)
     { PMPROD *son = (PMPROD *)(pm_prod -> variables[nr_variables + ix].text_par);
       if (type_bits[ix/32] & (1 << (ix %32)))
          { for (iy = 0; iy < indent + 3; iy++) abs_printf (" ");
	    abs_message (">LEX %p: %s", son, ((Transition *) son) -> text);
	  }
       else posmemo_rdump_pmprod (son, indent + 3);
     };
}

/*
   The following code is the crux of the positive memoization
   We enter this code when a positive memo has been constructed
   for this nonterminal, formals, penalty and next input position.
   The posmemo list is kept sorted by penalty. Ambiguous parses
   are indicated by being a member of the equivalent list. Each
   of the ambiguous parses points to the first recognized
   equivalent parse with the prime pointer

   While adding, a bound may be set to limit the number of parses;
   so while traversing we keep track of the number of parses with
   the same nonterminal, formals and next input position (in effect
   all parses with a different penalty).
*/

/*
   Two productions are completely equal if they have the same formals,
   locals, sons, next_state and penalty.
*/
static int equal_posmemo (PosMemo curr, PosMemo new_prod)
{ return ((curr -> nr_formals == new_prod -> nr_formals) &&
	  (curr -> nr_sons == new_prod -> nr_sons) &&
	  (curr -> next_state == new_prod -> next_state) &&
	  (memcmp (curr -> variables, new_prod -> variables,
		   (new_prod -> nr_variables + new_prod -> nr_sons) * sizeof (LXCN_VALUE)) == 0));
}

/*
   Two productions are equivalent if they have the same formals and next_state
   (They recognize the same span of input with the same resulting formals).

   Note that if they do not have the same penalty, they can not be ambiguous.
*/
static int equivalent_posmemo (PosMemo curr, PosMemo new_prod)
{ return ((curr -> nr_formals == new_prod -> nr_formals) &&
	  (curr -> next_state == new_prod -> next_state) &&
	  (memcmp (curr -> variables, new_prod -> variables,
		   new_prod -> nr_formals * sizeof (LXCN_VALUE)) == 0));
}

static void delete_posmemo (PosMemo memo, char *comment)
{
#ifdef DEBUG_POSMEMO
  abs_message ("Deleting %s posmemo %p", comment, memo);
#endif
  if (memo -> variables)
     posmemo_freemem (memo -> variables);
  posmemo_freemem (memo);
}

static void posmemo_add_sorted (PosMemo* pms, PosMemo new_prod)
{ PosMemo pred = NULL;
  PosMemo curr = *pms;
  long nr_parses = 0;

  /* Find the insertion point */
#ifdef DEBUG_POSMEMO
  abs_message ("Entering the posmemo sort with maxparses = %d", max_parses);
#endif
  /* First pass all parses that have a lower penalty than ours */
  while ((curr != NULL) && (curr -> penalty < new_prod -> penalty))
     { if (equivalent_posmemo (curr, new_prod))
	  { nr_parses++;
            if (nr_parses == max_parses)
	       { delete_posmemo (new_prod, "new");
	         return;
	       }
	  }
       pred = curr;
       curr = curr -> next;
     }

#ifdef DEBUG_POSMEMO
  abs_message ("Found %d equivalent parses with lower penalty", nr_parses);
#endif 

  /* Either curr == NULL at this point or curr -> penalty >= new_prod -> penalty */
  /* Note also that nr_parses < max_parses */
  assert ((curr == NULL) || (curr -> penalty >= new_prod -> penalty));
  assert (nr_parses < max_parses);
  while ((curr != NULL) && (curr -> penalty == new_prod -> penalty))
     { /* Pass over all parses who share the penalty */
       /* If you encounter an equivalent one, we have at least an equal or ambiguous parse */
       if (equivalent_posmemo (curr, new_prod))
	  { PosMemo prime = curr;
	    int length = 1;
#ifdef DEBUG_POSMEMO
	    abs_message ("Trying to insert new posmemo %p into ambiguous prime node %p",
			 new_prod, prime);
#endif
	    /*
	       Iterate over the equivalent parses while checking if there is a
	       completely equal one; if so we can forget the new production
	    */
	    while (1)
	       { /* If you encounter an equal one, we're immediately done */
		 if (equal_posmemo (curr, new_prod))
		    {
#ifdef DEBUG_POSMEMO
		      abs_message("equal to %p: ", curr);
#endif
		      delete_posmemo (new_prod, "new equal");
		      return;
		    };

		 if (curr -> equiv == NULL) break;
		 curr = curr -> equiv;
		 length++;
	       };

	    /*
	       Limit the length of the equivalence list. Since there is
	       no criterion to decide which equivalent parse is "better"
	       (they all cover the same input), we only need the first
	       max_parses, since we are only going to print at most
	       max_parses combinations of equivalents.
	    */
	    if (length >= max_parses)
	       { delete_posmemo (new_prod, "new equivalent");
		 return;
	       };

	    /* Add the new production to the end of the list of equivalents */
	    curr -> equiv = new_prod;
	    new_prod -> prime = prime;
	    new_prod -> next = curr -> next;
	    prime -> prime = prime;
	    return;
	  };

       pred = curr;
       curr = curr -> next;
     }

  /* Either curr == NULL at this point or curr -> penalty > new_prod -> penalty */
  /* Since nr_parses < max_parses still holds, we can insert the posmemo structure */
  assert((curr == NULL) || (curr -> penalty > new_prod -> penalty));
  assert((nr_parses < max_parses));
#ifdef DEBUG_POSMEMO
  abs_message ("Inserting new posmemo %p after %p before %p", new_prod, pred, curr);
#endif
  new_prod -> next = curr;

  /* The question here is if we still need the next pointers on equivalent parses */
  if (pred == NULL) *pms = new_prod;
  else
     { PosMemo equiv;	/* update all next pointers of the predecessor's equivalents */
       for (equiv = pred; equiv != NULL; equiv = equiv -> equiv)
          equiv -> next = new_prod;
     };

  /* We have definitely added another parse, so remember */
  pred = new_prod;
  nr_parses++;

  /* Scan the remainder of the posmemo list for parses that should be removed */
  while (curr != NULL)
     { if (equivalent_posmemo (curr, new_prod))
	  { /* another parse, check if we have to remove it */
	    nr_parses++;
	    if (nr_parses > max_parses)
	       { PosMemo equiv, next;
#ifdef DEBUG_POSMEMO
	         abs_message ("Deleting old equivalent posmemo %p after %p before %p",
			      curr, pred, curr -> next);
#endif
	         /* Update the next pointers of the predecessor node and its equivalent nodes. */
	         for (equiv = pred; equiv != NULL; equiv = equiv -> equiv)
		    equiv -> next = curr -> next;

		 /*
		    Naturally, if we delete the current production we should also
	            delete its equivalents.
	         */
	         for (equiv = curr; equiv != NULL; equiv = next)
		    { next = equiv -> equiv;
		      delete_posmemo (equiv, "old equivalent");
		    };

		 /* We're done */
		 return;
	       };
	  };
       pred = curr;
       curr = curr -> next;
     }
}

/*
   Add a positive memo
*/
int pointsIntoStack(void *p);
void posmemo_add_production (StateIndicator input_state, unsigned nont_nr,
                             long penalty, unsigned nr_formals, unsigned nr_locals,
			     unsigned nr_sons, LXCN_VALUE* variables,
			     StateIndicator target_state, void *pass2)
{ unsigned nr_sontype_words = (nr_sons + 31)/32;
  size_t variables_block_size = (nr_formals + nr_locals + nr_sons + nr_sontype_words) *
				sizeof (LXCN_VALUE);
  PosMemo *x = posmemo_fetch (input_state, nont_nr);
  PosMemo new_memo = posmemo_getmem (sizeof(PMPROD));
  unsigned *type_bits;
  unsigned i;

  new_memo -> nont_nr = nont_nr;
  new_memo -> nr_formals = nr_formals;
  new_memo -> nr_variables = nr_formals + nr_locals;
  new_memo -> nr_sons = nr_sons;

  /* Allocate and fill a copy of the formals, locals, pointers to posmemo's of sons
     or trellis transition entries and a bitmap indicating the nature of the sons.
  */
  if (variables_block_size > 0)
     { new_memo -> variables = (LXCN_VALUE *) posmemo_getmem (variables_block_size);

       /* Copy formals + locals */
       /* NOTE: variables 1 is at -1, 2 at -2, etc! */
       for (i = 0; i < nr_formals + nr_locals; i++)
          new_memo -> variables[i] = variables[-(int)i];

       /* Setup son typing admin */
       type_bits = (unsigned *) &(new_memo -> variables[nr_formals + nr_locals + nr_sons]);
       for (i = 0; i < nr_sontype_words; i++) type_bits[i] = 0;

       /* copy sons by copying PMPROD entries or transition pointers from frame */
       for (i = 0; i < nr_sons; i++)
          { LXCN_VALUE son = variables[-(long)(nr_formals+nr_locals+i)];
            if (pointsIntoStack((LXCN_VALUE *)(son.text_par)))
	       /* This son is a syntax nonterminal, copy its PMPROD entry */
	       /* XXX wth is -7 ??? */
	       new_memo -> variables[nr_formals+nr_locals+i] = ((LXCN_VALUE *)(son.text_par))[-7];
            else
	       { /* copy the transition entry */
	         new_memo -> variables[nr_formals+nr_locals+i] = son;	
	         type_bits[i/32] |= (1 << (i%32));
	       };
          };
     }
  else new_memo -> variables = NULL;

  new_memo -> next_state = target_state;
  new_memo -> penalty = penalty;
  new_memo -> failcont = NULL;
  new_memo -> pass2 = pass2;

  new_memo -> equiv = NULL;
  new_memo -> prime = NULL;
  new_memo -> next = NULL;

#ifdef DEBUG_POSMEMO
  abs_message ("Generating posmemo at input state: %p", input_state);
  posmemo_rdump_pmprod (new_memo, 0);
#endif
  posmemo_add_sorted(x, new_memo);
}

void posmemo_free_vec (PosMemo* entry)
{
#ifdef DEBUG_POSMEMO
  abs_message ("Freeing entire posmemo vector");
#endif
  if ((*entry != POSMEMO_UNKNOWN) && (*entry != POSMEMO_BLOCKED))
    { PosMemo prod = *entry;
      while (prod != NULL)
         { PosMemo tail = (prod -> equiv != NULL)?prod -> equiv:prod -> next;
           if (prod -> variables)
              posmemo_freemem (prod -> variables);
           posmemo_freemem(prod);
           prod = tail;
         };
      *entry = POSMEMO_UNKNOWN;
    }
}

int posmemo_count_prod (StateIndicator input_state, unsigned nont_nr)
{ int count = 0;
  PosMemo x = *(posmemo_fetch(input_state, nont_nr));
  while (x)
    { count++;
      x = x -> next;
    };
  return (count);
}

void posmemo_set_failcont(PosMemo prod, void* pc)
{ assert (prod);
  prod -> failcont = pc;
}

void* posmemo_get_failcont(PosMemo prod)
{ assert (prod);
  return (prod -> failcont);
}

void* posmemo_get_pass2(PosMemo prod)
{ assert (prod);
  return (prod -> pass2);
}

PosMemo posmemo_get_prod_ptr(StateIndicator input_state, unsigned nont_nr)
{ return (*(posmemo_fetch (input_state, nont_nr)));
}

void* posmemo_get_formal_ptr(PosMemo state)
{ assert (state);
  return (state -> variables);
}

LXCN_VALUE posmemo_get_variable(PosMemo state,int nr)
{ assert (state);
  assert (nr < state -> nr_variables);
  return (state -> variables[nr]);
}

LXCN_VALUE posmemo_get_local(PosMemo state,int nr)
{ assert (state);
  assert (state -> nr_formals+nr < state -> nr_variables);
  return (state -> variables[state -> nr_formals+nr]);
}

PosMemo posmemo_get_son (PosMemo state,int nr)
{ assert (state);
  assert (nr < state -> nr_sons);
  return ((PosMemo)(state -> variables[state -> nr_variables+nr].text_par));
}

long posmemo_get_penalty(PosMemo state)
{ assert (state);
  return (state -> penalty);
}

PosMemo posmemo_get_next_prod(PosMemo curr)
{ assert(curr);
  return (curr -> next);
}

StateIndicator posmemo_get_input_state(PosMemo curr)
{ assert(curr);
  return (curr -> next_state);
}

void posmemo_dump_pmprod_list (PMPROD* pmprod)
{ while (pmprod != NULL)
     { unsigned ix;
       abs_message ("nont_nr = %u, pen = %ld, nr_formals = %u, next_state = %u, failcont = %p",
          	    pmprod -> nont_nr, pmprod -> penalty, pmprod -> nr_formals,
		    pmprod -> next_state -> pos, pmprod -> failcont);
       for (ix = 0; ix < pmprod -> nr_variables; ix++)
          abs_message ("\tformal %d set value: %lu", ix, (pmprod -> variables)[ix].set_par);
       pmprod = pmprod -> next;
     };
}

void posmemo_dump_table (Trellis* trellis)
{
    StateNode** state_row = GET_TRELLIS_STATE_ROW(trellis);
    int node_nr;
    int rule_nr;
    int *empty_rule;
    int *empty_node;
    int **overview = (int**) abs_calloc (trellis -> length, sizeof(int*),
					 "posmemo_dump_table: overview[]");

    /* Build the table: */
    for(node_nr = 0; node_nr < trellis->length; node_nr++) {
        StateNode* state = *state_row++;

        if (!state) 
           overview[node_nr] = NULL;
        else {
            PosMemo* pma = state->pos_memos;

            if(!pma) {
                overview[node_nr] = NULL;
		printf("\t\t(posmemo %u skipped, NULL)\n", state->pos);
            } else {
                overview[node_nr] = (int*) abs_calloc (get_nr_syntax_nonterminals(), sizeof (int),
				                       "posmemo_dump_table: overview[][]");

                for(rule_nr = 1; rule_nr < get_nr_syntax_nonterminals(); rule_nr++) {
                    if(posmemo_is_blocked(state, rule_nr)) {
                        overview[node_nr][rule_nr] = -2;
                    } else {
                        if(posmemo_is_unknown(state, rule_nr)) {
                            overview[node_nr][rule_nr] = -1;
// printf("\t\t(posmemo rule %d @ %u skipped, unknown)\n", rule_nr, state->pos);
                        } else {
                            PosMemo plijst = posmemo_get_prod_ptr(state, rule_nr);
                            int nr_ptrs = 0;

                            while(plijst) {
    				abs_printf ("%d: ", node_nr + 1);	/* Match trellis dump */
                                posmemo_dump_pmprod(plijst);
                                nr_ptrs++;
                                plijst = plijst->next;
                            }

                            overview[node_nr][rule_nr] = nr_ptrs;
                        }
                    }
                }
            }
        }
    }

    /* printed table compression */
    empty_rule = (int*) abs_calloc (get_nr_syntax_nonterminals(), sizeof(int),
				    "posmemo_dump_table: empty_rule");
    for (rule_nr = 1; rule_nr < get_nr_syntax_nonterminals(); rule_nr++) {
        empty_rule[rule_nr] = 1;
        node_nr = 0;

        while ((node_nr < trellis->length) && (empty_rule[rule_nr])) {
            if (overview[node_nr]) {
                switch (overview[node_nr][rule_nr]) {
                    case -1:
                        break;
                    case -2:
                        empty_rule[rule_nr] = 0;
                        break;
                    default:
                        empty_rule[rule_nr] = !overview[node_nr][rule_nr];
                }
            }

            node_nr++;
        }
    }
    empty_node = (int*) abs_calloc (trellis->length, sizeof(int), "posmemo_dump_table: empty_node");
    for (node_nr = 0; node_nr < trellis->length; node_nr++) {
        empty_node[node_nr] = 1;
        rule_nr = 1;

        while ((rule_nr < get_nr_syntax_nonterminals())
               && (empty_node[node_nr])
               && (overview[node_nr])) {
            switch (overview[node_nr][rule_nr]) {
                case -1:
                    break;
                case -2:
                    empty_node[node_nr] = 0;
                    break;
                default:
                    empty_node[node_nr] = !overview[node_nr][rule_nr];
            }

            rule_nr++;
        }
    }

    /* actually show it: */
    /* first the table */
    for (rule_nr = 1; rule_nr < get_nr_syntax_nonterminals(); rule_nr++) {
        if (!empty_rule[rule_nr]) {
            abs_printf("%3d|", rule_nr);

            for (node_nr = 0; node_nr < trellis->length; node_nr++) {
                if (!empty_node[node_nr]) {
                    switch (overview[node_nr][rule_nr]) {
                        case -1:
                            abs_printf("   u");
                            break;
                        case -2:
                            abs_printf("   b");
                            break;
                        default:
                            abs_printf(" %3d", overview[node_nr][rule_nr]);
                    }
                }
            }

            if (nonterm_names[rule_nr]) abs_message (" | %s", nonterm_names[rule_nr]);
            else abs_message (" | ?");
        }
    }
    /* then a neat line below it */
    abs_printf("---+");
    for (node_nr = 0; node_nr < trellis->length; node_nr++) {
        if (!empty_node[node_nr]) {
            abs_printf("----");
        }
    }
    /* and of course the numbers */
    abs_printf("\n   |");
    for (node_nr = 0; node_nr < trellis->length; node_nr++) {
        if (!empty_node[node_nr]) {
            abs_printf(" %3d", node_nr);
        }
    }
    abs_printf("\n");

    /* free the space: */
    for (node_nr = 0; node_nr < trellis->length; node_nr++) {
        if (overview[node_nr]) {
            abs_free (overview[node_nr], "posmemo_dump_table: overview[][]");
        }
    }
    abs_free (overview, "posmemo_dump_table: overview[]");
    abs_free (empty_rule, "posmemo_dump_table: empty_rule");
}
