/* context.c - Context functions.
   Copyright (C) 2001-2015 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
   02110-1301, USA.  */

/* Written by Marc Tardif <marc@interunion.ca>.  */

/* The context algorithm in this program can be summarized with the
   following illustration:

   str             fpos                   max bpos
    v               v                       v v
    +------------------------------------------------------------+
                               opt.context

   The str address points to the beginning of the memory location where
   the context for a pattern is manipulated.  The size of this segment
   is kept in the opt.context global variable which is usually set to
   DEFAULT_CONTEXT.  The actual memory allocated is increased by one
   byte to account for padding as described below.

   When a keyword is found in the input, the context preceding the current
   position is copied backwards in the str buffer.  The starting position
   is set to str + opt.context + 1, where characters are copied from the
   input buffer until a paragraph delimiter is reached or the beginning
   of the str buffer.  The resulting offset from str is stored in the
   bpos variable.

   The maximum forward context is then set according to the number of
   characters read backwards.  The object is to retain as much context as
   possible while keeping the keyword as centered as possible.  Therefore,
   max is set to opt.context / 2 or bpos depending on which is greater.
   Characters are then copied from the input buffer starting at str until
   a paragraph delimiter is reached or max.  If the end of the current
   input buffer is reached prematurely, new data is read into the
   input buffer and the remaining context is processed.  If the last
   copied character is a space, fpos is set to the previous offset,
   otherwise it is set to the current offset.  This explains the need for
   the padding byte in opt.context, otherwise a byte from the backward
   context could be overwritten which would yield a context of one byte
   less than opt.context.

   After fpos has been determined, the value of bpos must be re-
   evaluated.  If fpos is greater than bpos, the latter is set to
   fpos + 1, otherwise it remains unchanged.  The actual context
   then consists of two fragments:
   - The first half is contained between str + bpos and str +
     opt.context.
   - The second half is located at the beginning of the str
     buffer and extends for fpos bytes.

   If a pattern consists of multiple keywords, when using the NEAR
   operator, the last two keyword matches are merged to extract as much
   relevant context as possible.  The previous match is stored in the
   line buffer for the current count.  The current match is stored in the
   tmp buffer.  To merge these buffers, the distance in number of bytes
   between each match is evaluated against the size of opt.context.
   If the distance is greater, two halves are concatenated with the
   sep_context string.  If smaller, the common segments of each buffer
   are centered to provide as much context on the left and right of
   each keyword.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

#include "context.h"
#include "expr.h"
#include "mem.h"
#include "options.h"

#include "event.h"
#include "html.h"
#include "text.h"

#define POP()   (*(--context->state))
#define PUSH(v) (*(context->state++) = v)

enum state
{
  SRCH = -2, /* search buffer                   */
  SAVE       /* save buffer                     */
	     /* this value should be zero       */
};

struct context *context;

/* separator strings */
static char sep_context[] = "... ";
static char sep_field[]   = ":";

/* pointer to text or html functions */
static unsigned char * (*go_forward) (char *, char *);
static int (*go_find) (char **, char **, char *);

int
context_init (char *str)
{
  mem_init ();
  context = mem_alloc (sizeof (struct context));
  context->num_pat = expr_init (str);
  if (!context->num_pat)
    return 0;

  context->tmp = mem_alloc (sizeof (struct line) - sizeof (char[4])
      + opt.context + 1);

  if (!opt.out_quiet)
    {
      register int i, j, occurrences;
      struct line **line;

      context->lines = mem_alloc (context->num_pat
	  * sizeof (struct line **));
      occurrences = opt.occurrences ? opt.occurrences : 1;

      for (i = 0; i < context->num_pat; i++)
	{
	  line = mem_alloc (occurrences * sizeof (struct line *));
	  for (j = 0; j < occurrences; j++)
	    line[j] = mem_alloc (sizeof (struct line) - sizeof (char[4])
		+ opt.context + 1);

	  context->lines[i] = line;
	}
    }

  return context->num_pat;
}

void
context_prep (enum extension type)
{
  context->num_bytes = 0;
  context->num_words = 0;
  context->num_lines = 1;
  context->num_paragraphs = 1;

  context->pattern_table = context->pstack;
  context->forward_table = context->fstack;
  *context->forward_table = NULL;
  context->state = context->sstack;
  PUSH (SRCH);

  if (type == HTML)
    {
      *context->pattern_table = html_first;
      go_forward = html_forward;
      go_find = html_find;
    }
  else
    {
      if (type == SOURCE)
	event['\n'] = NP;
      else
	event['\n'] = NL;

      *context->pattern_table = text_first;
      go_forward = text_forward;
      go_find = text_find;
    }

  expr_prep ();
}

static void
line_merge (struct line *dest, struct line *src, int distance)
{
  register int dest_len, src_len;

  dest_len = dest->bpos + distance + src->fpos;
  if (dest_len > opt.context)
    {
      if (distance > opt.context - 5)
	{
	  register int half;

	  half = (opt.context - sizeof sep_context + 1) / 2;
	  src_len = dest->bpos - MIN (half / 2, dest->bpos);
	  if (src_len)
	    memmove (dest->str, dest->str + src_len, half);
	  dest_len = half;

	  memcpy (dest->str + dest_len,
	      sep_context, sizeof sep_context - 1);
	  dest_len += sizeof sep_context - 1;

	  src_len = half - MIN (half / 2, src->fpos);
	  memcpy (dest->str + dest_len,
	      src->str + opt.context + 1 - src_len, src_len);
	  dest_len += src_len;

	  if (src->fpos)
	    {
	      memcpy (dest->str + dest_len, src->str,
		  half - src_len);
	      dest_len += half - src_len;
	    }
	}
      else
	{
	  dest_len = (opt.context - distance + 1) / 2;
	  if (dest_len > src->fpos)
	    dest_len += dest_len - src->fpos;
	  if (dest_len < dest->bpos)
	    memmove (dest->str, dest->str + dest->bpos - dest_len,
		dest_len + dest->fpos);
	  else
	    dest_len = dest->bpos;

	  dest_len += dest->fpos;
	  if (dest->fpos < distance)
	    {
	      src_len = (distance & ~1) - dest->fpos;
	      memcpy (dest->str + dest_len,
		  src->str + opt.context + 1 - src_len, src_len);
	      dest_len += src_len;

	      src_len = MIN (opt.context - dest_len, src->fpos);
	      if (src_len > 0)
		{
		  memcpy (dest->str + dest_len, src->str, src_len);
		  dest_len += src_len;
		}
	    }
	  else
	    {
	      src_len = MIN (opt.context - dest_len,
		  (distance & ~1) + src->fpos - dest->fpos);
	      if (src_len > 0)
		{
		  memcpy (dest->str + dest_len,
		      src->str + dest->fpos - distance, src_len);
		  dest_len += src_len;
		}
	    }
	}
    }
  if (dest->str[dest_len - 1] == ' ')
    dest_len--;
  dest->str[dest_len] = '\0';
}

static void
line_copy (struct line *dest, struct line *src)
{
  register int src_bpos;

  src_bpos = opt.context + 1 - src->bpos;
  memcpy (dest->str, src->str + src->bpos, src_bpos);
  memcpy (dest->str + src_bpos, src->str, src->fpos);
  dest->bpos = src_bpos;
  dest->fpos = src->fpos;
  dest->str[src_bpos + src->fpos] = '\0';
}

static void
line_print (struct line *line, int pat)
{
  if (opt.with_filename)
    printf ("%s%s", opt.filename, sep_field);
  if (opt.with_pattern)
    {
      expr_print (pat);
      printf ("%s", sep_field);
    }
  if (opt.out_line)
    printf ("%d%s", line->line, sep_field);
  if (opt.out_byte)
    printf ("%d%s", line->byte, sep_field);
  printf ("%s\n", line->str);
}

static char *
line_get (int kw, char *pos, char *buf, char *lim)
{
  struct line *tmp;
  int count, near, pat;

  tmp = context->tmp;
  if (*context->forward_table)
    {
      go_forward (buf, lim);
      near = POP ();
      pat = POP ();
      count = expr_count (pat);
      if (near >= 0)
	count--;

      goto mid;
    }

  tmp->bpos = tmp->fpos = 0;
  while ((pat = expr_next (kw, context->num_bytes, context->num_words,
      context->num_paragraphs)) >= 0)
    {
    /* try to find way to merge two and avoid pushing near */
      near = expr_near (kw, pat);
      count = expr_count (pat);
      if (near >= 0)
	count--;

      if (!opt.out_quiet
	  && (!opt.occurrences || count < (int)opt.occurrences))
	{
	  struct line *line;

	  if (!tmp->bpos)
	    {
	      char *beg;

	      beg = text_backward (pos);
	      if (go_forward (buf, lim))
		{
		  PUSH (pat);
		  PUSH (near);
		  return beg;
		}
 mid:
	      if (tmp->bpos < tmp->fpos)
		tmp->bpos = tmp->fpos;
	      if (tmp->str[tmp->fpos - 1] == ' ')
		tmp->fpos--;
	      if (tmp->str[tmp->bpos] == ' ' || tmp->bpos == tmp->fpos)
		tmp->bpos++;
	    }

	  if (!opt.occurrences)
	    count = 0;

	  line = context->lines[pat][count];

	  if (near > 0)
	    line_merge (line, tmp, context->num_bytes - line->byte);
	  else
	    line_copy (line, tmp);

	  line->byte = context->num_bytes;
	  line->line = context->num_lines;

	  if (!opt.occurrences && near >= 0)
	    line_print (line, pat);
	}
      near = context->num_words;
    }

  *context->forward_table = NULL;
  return NULL;
}

int
context_find (char **pos, char **buf, char *lim)
{
  register int state;
  char *beg;

  state = POP ();
  if (state != SRCH) {
    line_get (state, *pos, *buf, lim);
    state = SRCH;
  }

  *buf = *pos;
  for (;;)
    {
      switch (state)
	{
	case SRCH:
	  state = go_find (pos, buf, lim);
	  break;
	case SAVE:
	  if (!opt.out_quiet)
	    beg = text_backward (*pos);
	  else
	    beg = *pos;
	  state = SRCH;
	  goto fin;
	default:
	  beg = line_get (state, *pos, *buf, lim);
	  if (beg)
	    goto fin;
	  state = SRCH;
	  break;
	}
    }

 fin:
  PUSH (state);
  return *pos - beg;
}

int
context_print (void)
{
  register int i, j;
  int count;

  if (!expr_eval ())
    {
      if (opt.list_files == 1)
	printf ("%s\n", opt.filename);
      return 0;
    }

  if (opt.out_quiet)
    {
      if (opt.count_matches)
	for (i = 0; i < context->num_pat; i++)
	  {
	    if (opt.with_filename)
	      printf ("%s%s", opt.filename, sep_field);
	    if (opt.with_pattern)
	      {
		expr_print (i);
		printf ("%s", sep_field);
	      }
	    printf ("%d\n", expr_count (i));
	  }
      else if (opt.list_files == 2)
	printf ("%s\n", opt.filename);
    }
  else if (opt.occurrences)
    for (i = 0; i < context->num_pat; i++)
      {
	count = expr_count (i);
	for (j = 0; j < opt.occurrences && count--; j++)
	  line_print (context->lines[i][j], i);
      }

  return 1;
}

void
context_free (void)
{
  mem_free ();
}