/*  GNU SED, a batch stream editor.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1998, 1999
    Free Software Foundation, Inc.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */

#undef EXPERIMENTAL_DASH_N_OPTIMIZATION	/*don't use -- is very buggy*/
#define INITIAL_BUFFER_SIZE	50
#define FREAD_BUFFER_SIZE	8192

#include "config.h"
#include <stdio.h>
#include <ctype.h>

#include <errno.h>
#ifndef errno
extern int errno;
#endif

#ifdef HAVE_ISATTY
# ifdef HAVE_UNISTD_H
#  include <unistd.h>
# endif
#endif

#ifdef __GNUC__
# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
   /* silence warning about unused parameter even for "gcc -W -Wunused" */
#  define UNUSED	__attribute__((unused))
# endif
#endif
#ifndef UNUSED
# define UNUSED
#endif

#ifndef HAVE_STRING_H
# include <strings.h>
# ifdef HAVE_MEMORY_H
#  include <memory.h>
# endif
#else
# include <string.h>
#endif /*HAVE_STRING_H*/

#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif

#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif

#include "regex-sed.h"
#include "basicdefs.h"
#include "utils.h"
#include "sed.h"

/* If set, buffer input as minimally as practical,
   and fflush(stdout) more often. */
extern flagT force_unbuffered;

/* If set, don't write out the line unless explicitly told to. */
extern flagT no_default_output;

/* Do we need to be pedantically POSIX compliant? */
extern flagT POSIXLY_CORRECT;

/* How long should the `l' command's output line be? */
extern countT lcmd_out_line_len;


/* Sed operates a line at a time. */
struct line {
  char *text;		/* Pointer to line allocated by malloc. */
  char *active;		/* Pointer to non-consumed part of text. */
  size_t length;	/* Length of text (or active, if used). */
  size_t alloc;		/* Allocated space for text. */
  flagT chomped;	/* Was a trailing newline dropped? */
};

/* A queue of text to write out at the end of a cycle
   (filled by the "a" and "r" commands.) */
struct append_queue {
  const char *rfile;
  const char *text;
  size_t textlen;
  struct append_queue *next;
};

/* State information for the input stream. */
struct input {
  char **file_list;	/* The list of yet-to-be-opened files.
			   It is invalid for file_list to be NULL.
			   When *file_list is NULL we are
			   currently processing the last file. */
  countT bad_count;	/* count of files we failed to open */
  countT line_number;	/* current input line number (over all files) */

  flagT (*read_fn) P_((struct input *));	/* read one line */
  /* If fp is NULL, read_fn better not be one which uses fp;
     in particular, read_always_fail() is recommended. */

  FILE *fp;		/* if NULL, none of the following are valid */
  VOID *base;		/* if non-NULL, we are using mmap()ed input */
  const char *cur;	/* only valid if base is non-NULL */
  size_t length;	/* only valid if base is non-NULL */
  size_t left;		/* only valid if base is non-NULL */
  flagT no_buffering;	/* only valid if base is NULL */
};


/* Have we done any replacements lately?  This is used by the `t' command. */
static flagT replaced = 0;

/* The `current' input line. */
static struct line line;

/* An input line that's been stored by later use by the program */
static struct line hold;

/* The buffered input look-ahead.  The only field that should be
   used outside of read_mem_line() or line_init() is buffer.length. */
static struct line buffer;

static struct append_queue *append_head = NULL;
static struct append_queue *append_tail = NULL;


#ifdef BOOTSTRAP
/* We can't be sure that the system we're boostrapping on has
   memchr(), and ../lib/memchr.c requires configuration knowledge
   about how many bits are in a `long'.  This implementation
   is far from ideal, but it should get us up-and-limping well
   enough to run the configure script, which is all that matters.
*/
# ifdef memchr
#  undef memchr
# endif
# define memchr bootstrap_memchr

static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
static VOID *
bootstrap_memchr(s, c, n)
  const VOID *s;
  int c;
  size_t n;
{
  char *p;

  for (p=(char *)s; n-- > 0; ++p)
    if (*p == c)
      return p;
  return CAST(VOID *)0;
}
#endif /*BOOTSTRAP*/

/* increase a struct line's length, making some attempt at
   keeping realloc() calls under control by padding for future growth.  */
static void resize_line P_((struct line *, size_t));
static void
resize_line(lb, len)
  struct line *lb;
  size_t len;
{
  lb->alloc *= 2;
  if (lb->alloc < len)
    lb->alloc = len;
  if (lb->alloc < INITIAL_BUFFER_SIZE)
    lb->alloc = INITIAL_BUFFER_SIZE;
  lb->text = REALLOC(lb->text, lb->alloc, char);
}

/* Append `length' bytes from `string' to the line `to'. */
static void str_append P_((struct line *, const char *, size_t));
static void
str_append(to, string, length)
  struct line *to;
  const char *string;
  size_t length;
{
  size_t new_length = to->length + length;

  if (to->alloc < new_length)
    resize_line(to, new_length);
  MEMCPY(to->text + to->length, string, length);
  to->length = new_length;
}

/* initialize a "struct line" buffer */
static void line_init P_((struct line *, size_t initial_size));
static void
line_init(buf, initial_size)
  struct line *buf;
  size_t initial_size;
{
  buf->text = MALLOC(initial_size, char);
  buf->active = NULL;
  buf->alloc = initial_size;
  buf->length = 0;
  buf->chomped = 1;
}

/* Copy the contents of the line `from' into the line `to'.
   This destroys the old contents of `to'. */
static void line_copy P_((struct line *from, struct line *to));
static void
line_copy(from, to)
  struct line *from;
  struct line *to;
{
  if (to->alloc < from->length)
    {
      to->alloc *= 2;
      if (to->alloc < from->length)
	to->alloc = from->length;
      if (to->alloc < INITIAL_BUFFER_SIZE)
	to->alloc = INITIAL_BUFFER_SIZE;
      /* Use FREE()+MALLOC() instead of REALLOC() to
	 avoid unnecessary copying of old text. */
      FREE(to->text);
      to->text = MALLOC(to->alloc, char);
    }
  MEMCPY(to->text, from->text, from->length);
  to->length = from->length;
  to->chomped = from->chomped;
}

/* Append the contents of the line `from' to the line `to'. */
static void line_append P_((struct line *from, struct line *to));
static void
line_append(from, to)
  struct line *from;
  struct line *to;
{
  str_append(to, "\n", 1);
  str_append(to, from->text, from->length);
  to->chomped = from->chomped;
}

/* Exchange the contents of two "struct line" buffers. */
static void line_exchange P_((struct line *, struct line *));
static void
line_exchange(a, b)
  struct line *a;
  struct line *b;
{
  struct line t;

  MEMCPY(&t,  a, sizeof(struct line));
  MEMCPY( a,  b, sizeof(struct line));
  MEMCPY( b, &t, sizeof(struct line));
}

static void line_undosify P_((struct line *, size_t, int));
static void __inline__
line_undosify(lbuf, from, ch)
  struct line *lbuf;
  size_t from;
{
#ifdef O_BINARY
  size_t lbuf_len = lbuf->length;

  /* Remove character CH from the end of the line starting at offset FROM.  */
  if ((lbuf_len > from
       /* This is for the case where CR was read in the previous call,
	  but its LF buddy was only read now.  */
       || (lbuf_len == from && ch == '\r'))
      && lbuf->text[lbuf_len-1] == ch)
    lbuf->length--;
#endif
}


/* dummy function to simplify read_pattern_space() */
static flagT read_always_fail P_((struct input *));
static flagT
read_always_fail(input)
  struct input *input UNUSED;
{
  return 0;
}

/* The quick-and-easy mmap()'d case... */
static flagT read_mem_line P_((struct input *));
static flagT
read_mem_line(input)
  struct input *input;
{
  const char *e;
  size_t l;

  if ( (e = memchr(input->cur, '\n', input->left)) )
    {
      /* common case */
      l = e++ - input->cur;
    }
  else
    {
      /* This test is placed _after_ the memchr() fails for performance
	 reasons (because this branch is the uncommon case and the
	 memchr() will safely fail if input->left == 0).  Okay, so the
	 savings is trivial.  I'm doing it anyway. */
      if (input->left == 0)
	return 0;
      l = input->left;
      e = input->cur + l;
      if (!*input->file_list && !POSIXLY_CORRECT)
	line.chomped = 0;
    }

  str_append(&line, input->cur, l);
  line_undosify(&line, line.length - l, '\r');
  input->left -= e - input->cur;
  input->cur = e;
  return 1;
}

/* fgets() doesn't let us handle NULs and fread() buffers too much
 * for interactive (or other unbuffered) uses.
 */
static size_t slow_getline P_((char *buf, size_t buflen, FILE *));
static size_t
slow_getline(buf, buflen, in)
  char *buf;
  size_t buflen;
  FILE *in;
{
  size_t resultlen = 0;
  int c;

  while (resultlen<buflen && (c=getc(in))!=EOF)
    {
      ++resultlen;
      *buf++ = c;
      if (c == '\n')
	break;
    }
  if (ferror(in))
    panic(_("input read error: %s"), strerror(errno));
  return resultlen;
}

static flagT read_file_line P_((struct input *));
static flagT
read_file_line(input)
  struct input *input;
{
  char *b;
  size_t blen;
  size_t initial_length = line.length;

  if (buffer.alloc == 0)
    {
      FREE(buffer.text);
      line_init(&buffer, FREAD_BUFFER_SIZE);
      buffer.active = buffer.text;
    }
  while (!(b = memchr(buffer.active, '\n', buffer.length)))
    {
      str_append(&line, buffer.active, buffer.length);
      buffer.length = 0;
      if (!feof(input->fp))
	{
	  if (input->no_buffering)
	    buffer.length = slow_getline(buffer.text, buffer.alloc, input->fp);
	  else
	    buffer.length = ck_fread(buffer.text, 1, buffer.alloc, input->fp);
	}

      if (buffer.length == 0)
	{
	  line_undosify(&line, initial_length, 0x1a);
	  if (!*input->file_list && !POSIXLY_CORRECT)
	    line.chomped = 0;
	  /* Did we hit EOF without reading anything?  If so, try
	     the next file; otherwise just go with what we did get. */
	  return (initial_length < line.length);
	}
      buffer.active = buffer.text;
    }

  blen = b - buffer.active;
  str_append(&line, buffer.active, blen);
  line_undosify(&line, line.length - blen, '\r');
  ++blen;
  buffer.active += blen;
  buffer.length -= blen;
  return 1;
}


static void output_line P_((const char *, size_t, flagT, FILE *));
static void
output_line(text, length, nl, fp)
  const char *text;
  size_t length;
  flagT nl;
  FILE *fp;
{
  if (length)
    ck_fwrite(text, 1, length, fp);
  if (nl)
    ck_fwrite("\n", 1, 1, fp);
  if (fp != stdout || force_unbuffered)
    ck_fflush(fp);
}

static struct append_queue *next_append_slot P_((void));
static struct append_queue *
next_append_slot()
{
  struct append_queue *n = MALLOC(1, struct append_queue);

  n->rfile = NULL;
  n->text = NULL;
  n->textlen = 0;
  n->next = NULL;

  if (append_tail)
      append_tail->next = n;
  else
      append_head = n;
  return append_tail = n;
}

static void release_append_queue P_((void));
static void
release_append_queue()
{
  struct append_queue *p, *q;

  for (p=append_head; p; p=q)
    {
      q = p->next;
      FREE(p);
    }
  append_head = append_tail = NULL;
}

static void dump_append_queue P_((void));
static void
dump_append_queue()
{
  struct append_queue *p;

  for (p=append_head; p; p=p->next)
    {
      if (p->text)
	  output_line(p->text, p->textlen, 0, stdout);
      if (p->rfile)
	{
	  char buf[FREAD_BUFFER_SIZE];
	  size_t cnt;
	  FILE *fp;

	  fp = fopen(p->rfile, "r");
	  /* Not ck_fopen() because: "If _rfile_ does not exist or cannot be
	     read, it shall be treated as if it were an empty file, causing
	     no error condition."  IEEE Std 1003.2-1992 */
	  if (fp)
	    {
	      set_read_mode(fp);
	      while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
		{
#ifdef O_BINARY
		  /* Remove CRs from CR-LF pairs, and the trailing ^Z.  */
		  register char *s = buf, *d = buf;
		  register size_t len = cnt;

		  while (len--)
		    {
		      if (*s == '\r' || *s == 0x1a)
			{
			  if (!len)
			    {
			      if (getc(fp) != EOF)
				{
				  if (*s == '\r')
				    {
				      fseek(fp, -2L, SEEK_CUR);
				      cnt--;
				    }
				  else
				    {
				      fseek(fp, -1L, SEEK_CUR);
				      *d++ = *s++;
				    }
				  break;
				}
			      else if (*s == 0x1a)
				{
				  cnt--;
				  break;
				}
			    }
			  else if (s[1] == '\n')
			    {
			      s++;
			      len--;
			      cnt--;
			    }
			}
		      *d++ = *s++;
		    }
#endif
		  ck_fwrite(buf, 1, cnt, stdout);
		}
	      fclose(fp);
	    }
	}
    }
  release_append_queue();
}


/* Initialize a struct input for the named file. */
static void open_next_file P_((const char *name, struct input *));
static void
open_next_file(name, input)
  const char *name;
  struct input *input;
{
  input->base = NULL;
  buffer.length = 0;

  if (name[0] == '-' && name[1] == '\0')
    {
      clearerr(stdin);	/* clear any stale EOF indication */
      input->fp = stdin;
    }
  else if ( ! (input->fp = fopen(name, "r")) )
    {
      const char *ptr = strerror(errno);
      fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
      input->read_fn = read_always_fail; /* a redundancy */
      ++input->bad_count;
      return;
    }

  set_read_mode (input->fp); /* for systems with text/binary schizophrenia */

  input->read_fn = read_file_line;
  if (force_unbuffered)
    input->no_buffering = 1;
  else if (map_file(input->fp, &input->base, &input->length))
    {
      input->cur = VCAST(char *)input->base;
      input->left = input->length;
      input->read_fn = read_mem_line;
    }
#ifdef HAVE_ISATTY
  else
    input->no_buffering = isatty(fileno(input->fp));
#endif
}


/* Clean up an input stream that we are done with. */
static void closedown P_((struct input *));
static void
closedown(input)
  struct input *input;
{
  input->read_fn = read_always_fail;
  if (!input->fp)
    return;
  if (input->base)
    unmap_file(input->base, input->length);
  if (input->fp != stdin) /* stdin can be reused on tty and tape devices */
    ck_fclose(input->fp);
  input->fp = NULL;
}

/* Read in the next line of input, and store it in the pattern space.
   Return zero if there is nothing left to input. */
static flagT read_pattern_space P_((struct input *, flagT append));
static flagT
read_pattern_space(input, append)
  struct input *input;
  flagT append;
{
  if (append_head) /* redundant test to optimize for common case */
    dump_append_queue();
  replaced = 0;
  if (!append)
    line.length = 0;
  line.chomped = 1;  /* default, until proved otherwise */

  while ( ! (*input->read_fn)(input) )
    {
      closedown(input);
      if (!*input->file_list)
	{
	  line.chomped = 0;
	  return 0;
	}
      open_next_file(*input->file_list++, input);
    }

  ++input->line_number;
  return 1;
}


static flagT last_file_with_data_p P_((struct input *));
static flagT
last_file_with_data_p(input)
  struct input *input;
{
  for (;;)
    {
      int ch;

      closedown(input);
      if (!*input->file_list)
	return 1;
      open_next_file(*input->file_list++, input);
      if (input->fp)
	{
	  if (input->base)
	    {
	      if (0 < input->left)
		return 0;
	    }
	  else if ((ch = getc(input->fp)) != EOF)
	    {
	      ungetc(ch, input->fp);
	      return 0;
	    }
	}
    }
}

/* Determine if we match the `$' address. */
static flagT test_dollar_EOF P_((struct input *));
static flagT
test_dollar_EOF(input)
  struct input *input;
{
  int ch;

  if (buffer.length)
    return 0;
  if (!input->fp)
    return last_file_with_data_p(input);
  if (input->base)
    return (input->left==0 && last_file_with_data_p(input));
  if (feof(input->fp))
    return last_file_with_data_p(input);
  if ((ch = getc(input->fp)) == EOF)
    return last_file_with_data_p(input);
  ungetc(ch, input->fp);
  return 0;
}

/* Return non-zero if the current line matches the address
   pointed to by `addr'. */
static flagT match_an_address_p P_((struct addr *, struct input *));
static flagT
match_an_address_p(addr, input)
  struct addr *addr;
  struct input *input;
{
  switch (addr->addr_type)
    {
    case addr_is_null:
      return 1;

    case addr_is_regex:
      return match_regex(addr->addr_regex, line.text, line.length, 0, NULL);

    case addr_is_num:
      return (addr->addr_number == input->line_number);

    case addr_is_num_mod:
      if (addr->addr_number < addr->addr_step)
	return (addr->addr_number == input->line_number%addr->addr_step);
      /* addr_number >= step implies we have an extra initial skip */
      if (input->line_number < addr->addr_number)
	return 0;
      /* normalize */
      addr->addr_number %= addr->addr_step;
      return (addr->addr_number == 0);

    case addr_is_num2:
    case addr_is_step:
    case addr_is_step_mod:
      /* reminder: these are only meaningful for a2 addresses */
      /* a2->addr_number needs to be recomputed each time a1 address
         matches for the step and step_mod types */
      return (addr->addr_number <= input->line_number);

    case addr_is_last:
      return test_dollar_EOF(input);

    default:
      panic(_("INTERNAL ERROR: bad address type"));
    }
  /*NOTREACHED*/
  return 0;
}

/* return non-zero if current address is valid for cmd */
static flagT match_address_p P_((struct sed_cmd *, struct input *));
static flagT
match_address_p(cmd, input)
  struct sed_cmd *cmd;
  struct input *input;
{
  flagT addr_matched = cmd->a1_matched;

  if (addr_matched)
    {
      if (match_an_address_p(cmd->a2, input))
	cmd->a1_matched = 0;
    }
  else if (match_an_address_p(&cmd->a1, input))
    {
      addr_matched = 1;
      if (cmd->a2)
	{
	  cmd->a1_matched = 1;
	  switch (cmd->a2->addr_type)
	    {
	    case addr_is_regex:
	      break;
	    case addr_is_step:
	      cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
	      break;
	    case addr_is_step_mod:
	      cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
				     - (input->line_number%cmd->a2->addr_step);
	      break;
	    default:
	      if (match_an_address_p(cmd->a2, input))
		cmd->a1_matched = 0;
	      break;
	    }
	}
    }
  if (cmd->addr_bang)
    return !addr_matched;
  return addr_matched;
}


static void do_list P_((void));
static void
do_list()
{
  unsigned char *p = CAST(unsigned char *)line.text;
  countT len = line.length;
  countT width = 0;
  char obuf[180];	/* just in case we encounter a 512-bit char (;-) */
  char *o;
  size_t olen;

  for (; len--; ++p) {
      o = obuf;
      if (ISPRINT(*p)) {
	  *o++ = *p;
	  if (*p == '\\')
	    *o++ = '\\';
      } else {
	  *o++ = '\\';
	  switch (*p) {
#if defined __STDC__ && __STDC__-0
	    case '\a': *o++ = 'a'; break;
#else /* Not STDC; we'll just assume ASCII */
	    case 007:  *o++ = 'a'; break;
#endif
	    case '\b': *o++ = 'b'; break;
	    case '\f': *o++ = 'f'; break;
	    case '\n': *o++ = 'n'; break;
	    case '\r': *o++ = 'r'; break;
	    case '\t': *o++ = 't'; break;
	    case '\v': *o++ = 'v'; break;
	    default:
	      sprintf(o, "%03o", *p);
	      o += strlen(o);
	      break;
	    }
      }
      olen = o - obuf;
      if (width+olen >= lcmd_out_line_len && lcmd_out_line_len > 0) {
	  ck_fwrite("\\\n", 1, 2, stdout);
	  width = 0;
      }
      ck_fwrite(obuf, 1, olen, stdout);
      width += olen;
  }
  ck_fwrite("$\n", 1, 2, stdout);
}

static void do_subst P_((struct subst *));
static void
do_subst(sub)
  struct subst *sub;
{
  /* static so as to keep malloc() calls down */
  static struct line s_accum;	/* accumulate the result of the s command. */
  size_t start = 0;	/* where to start scan for (next) match in LINE */
  countT count = 0;	/* number of matches found */
  flagT did_subst = 0;
#define MAX_BACKREFERENCES 10
  regoff_t begbuf[MAX_BACKREFERENCES+1];
  regoff_t endbuf[MAX_BACKREFERENCES+1];
  struct re_registers regs;

#ifdef DEBUG_LEAKS
  /* A hack of a mechanism to free the static buffers for
     the DEBUG_LEAKS cleanup. */
  if (!sub)
    {
      FREE(regs.start);
      FREE(regs.end);
      regs.start = regs.end = NULL;
      regs.num_regs = 0;
      FREE(s_accum.text);
      s_accum.text = NULL;
      s_accum.alloc = 0;
      return;
    }
#endif /*DEBUG_LEAKS*/
  if (s_accum.alloc == 0)
    line_init(&s_accum, INITIAL_BUFFER_SIZE);
  s_accum.length = 0;

  regs.num_regs = MAX_BACKREFERENCES+1;
  regs.start = begbuf;
  regs.end = endbuf;

  while (match_regex(sub->regx, line.text, line.length, start, &regs))
    {
      size_t offset = regs.start[0];
      ++count;

      /* Copy stuff to the left of this match into the output string. */
      if (start < offset)
	str_append(&s_accum, line.text + start, offset - start);

      /* If we're counting up to the Nth match, are we there yet? */
      if (count < sub->numb)
	{
	  /* Not there yet...so skip this match. */
	  size_t matched = regs.end[0] - regs.start[0];

	  /* If the match was vacuous, skip ahead one character
	   * anyway.   It isn't at all obvious to me that this is
	   * the right behavior for this case.    -t	XXX
	   */
	  if (matched == 0 && offset < line.length)
	    matched = 1;

	  str_append(&s_accum, line.text + offset, matched);
	  start = offset + matched;
	  continue;
	}

      /* Expand the replacement string into the output string. */
      {
	struct replacement *p;

	for (p=sub->replacement; p; p=p->next)
	  {
	    int i = p->subst_id;
	    if (p->prefix_length)
	      str_append(&s_accum, p->prefix, p->prefix_length);
	    if (0 <= i)
	      str_append(&s_accum, line.text+regs.start[i],
			 CAST(size_t)(regs.end[i]-regs.start[i]));
	  }
      }

      did_subst = 1;
      start = regs.end[0];
      if (!sub->global || start == line.length)
	break;

      /* If the match was vacuous, skip over one character
       * and add that character to the output.
       */
      if (regs.start[0] == regs.end[0])
	{
	  str_append(&s_accum, line.text + offset, 1);
	  ++start;
	}
    }

  if (did_subst)
    {
      /* Copy stuff to the right of the last match into the output string. */
      if (start < line.length)
	str_append(&s_accum, line.text + start, line.length-start);
      s_accum.chomped = line.chomped;

      /* Exchange line and s_accum.  This can be much cheaper
	 than copying s_accum.text into line.text (for huge lines). */
      line_exchange(&line, &s_accum);

      /* Finish up. */
      if (sub->wfile)
	output_line(line.text, line.length, line.chomped, sub->wfile);
      if (sub->print)
	output_line(line.text, line.length, line.chomped, stdout);
      replaced = 1;
    }
}

#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
/* Used to attempt a simple-minded optimization. */

static countT branches;

static countT count_branches P_((struct vector *));
static countT
count_branches(program)
  struct vector *program;
{
  struct sed_cmd *cur_cmd = program->v;
  countT isn_cnt = program->v_length;
  countT cnt = 0;

  while (isn_cnt-- > 0)
    {
      switch (cur_cmd->cmd)
	{
	case 'b':
	case 't':
	case '{':
	  ++cnt;
	}
    }
  return cnt;
}

static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
static struct sed_cmd *
shrink_program(vec, cur_cmd)
  struct vector *vec;
  struct sed_cmd *cur_cmd;
{
  struct sed_cmd *v = vec->v;
  struct sed_cmd *last_cmd = v + vec->v_length;
  struct sed_cmd *p;
  countT cmd_cnt;

  for (p=v; p < cur_cmd; ++p)
    if (p->cmd != ':')
      MEMCPY(v++, p, sizeof *v);
  cmd_cnt = v - vec->v;

  for (; p < last_cmd; ++p)
    if (p->cmd != ':')
      MEMCPY(v++, p, sizeof *v);
  vec->v_length = v - vec->v;

  return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
}
#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/

/* Execute the program `vec' on the current input line.
   Return non-zero if caller should quit, 0 otherwise. */
static flagT execute_program P_((struct vector *, struct input *));
static flagT
execute_program(vec, input)
  struct vector *vec;
  struct input *input;
{
  struct sed_cmd *cur_cmd;
  struct sed_cmd *end_cmd;

  cur_cmd = vec->v;
  end_cmd = vec->v + vec->v_length;
  while (cur_cmd < end_cmd)
    {
      if (match_address_p(cur_cmd, input))
	{
	  switch (cur_cmd->cmd)
	    {
	    case 'a':
	      {
		struct append_queue *aq = next_append_slot();
		aq->text = cur_cmd->x.cmd_txt.text;
		aq->textlen = cur_cmd->x.cmd_txt.text_length;
	      }
	      break;

	    case '{':
	    case 'b':
	      cur_cmd = vec->v + cur_cmd->x.jump_index;
	      continue;

	    case '}':
	    case ':':
	      /* Executing labels and block-ends are easy. */
	      break;

	    case 'c':
	      if (!cur_cmd->a1_matched)
		output_line(cur_cmd->x.cmd_txt.text,
			    cur_cmd->x.cmd_txt.text_length, 0, stdout);
	      /* POSIX.2 is silent about c starting a new cycle,
		 but it seems to be expected (and make sense). */
	      /* Fall Through */
	    case 'd':
	      line.length = 0;
	      line.chomped = 0;
	      return 0;

	    case 'D':
	      {
		char *p = memchr(line.text, '\n', line.length);
		if (!p)
		  {
		    line.length = 0;
		    line.chomped = 0;
		    return 0;
		  }
		++p;
		line.length -= p - line.text;
		memmove(line.text, p, line.length);

		/* reset to start next cycle without reading a new line: */
		cur_cmd = vec->v;
		continue;
	      }

	    case 'g':
	      line_copy(&hold, &line);
	      break;

	    case 'G':
	      line_append(&hold, &line);
	      break;

	    case 'h':
	      line_copy(&line, &hold);
	      break;

	    case 'H':
	      line_append(&line, &hold);
	      break;

	    case 'i':
	      output_line(cur_cmd->x.cmd_txt.text,
			  cur_cmd->x.cmd_txt.text_length, 0, stdout);
	      break;

	    case 'l':
	      do_list();
	      break;

	    case 'n':
	      if (!no_default_output)
		output_line(line.text, line.length, line.chomped, stdout);
	      if (!read_pattern_space(input, 0))
		return 1;
	      break;

	    case 'N':
	      str_append(&line, "\n", 1);
	      if (!read_pattern_space(input, 1))
		return 1;
	      break;

	    case 'p':
	      output_line(line.text, line.length, line.chomped, stdout);
	      break;

	    case 'P':
	      {
		char *p = memchr(line.text, '\n', line.length);
		output_line(line.text, p ? p - line.text : line.length,
			    p ? 1 : line.chomped, stdout);
	      }
	      break;

	    case 'q':
	      return 1;

	    case 'r':
	      if (cur_cmd->x.rfile)
		{
		  struct append_queue *aq = next_append_slot();
		  aq->rfile = cur_cmd->x.rfile;
		}
	      break;

	    case 's':
	      do_subst(cur_cmd->x.cmd_subst);
	      break;

	    case 't':
	      if (replaced)
		{
		  replaced = 0;
		  cur_cmd = vec->v + cur_cmd->x.jump_index;
		  continue;
		}
	      break;

	    case 'w':
	      if (cur_cmd->x.wfile)
		output_line(line.text, line.length,
			    line.chomped, cur_cmd->x.wfile);
	      break;

	    case 'x':
	      line_exchange(&line, &hold);
	      break;

	    case 'y':
	      {
		unsigned char *p, *e;
		p = CAST(unsigned char *)line.text;
		for (e=p+line.length; p<e; ++p)
		  *p = cur_cmd->x.translate[*p];
	      }
	      break;

	    case '=':
	      printf("%lu\n", CAST(unsigned long)input->line_number);
	      break;

	    default:
	      panic(_("INTERNAL ERROR: Bad cmd %c"), cur_cmd->cmd);
	    }
	}
#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
/* If our top-level program consists solely of commands with addr_is_num
 * addresses then once we past the last mentioned line we should be able
 * to quit if no_default_output is true, or otherwise quickly copy input
 * to output.  Now whether this optimization is a win or not depends
 * on how cheaply we can implement this for the cases where it doesn't
 * help, as compared against how much time is saved.
 * One semantic difference (which I think is an improvement) is
 * that *this* version will terminate after printing line two
 * in the script "yes | sed -n 2p".
 */
      else
	{
	  /* can we ever match again? */
	  if (cur_cmd->a1.addr_type == addr_is_num &&
	      ((input->line_number < cur_cmd->a1.addr_number)
	       != !cur_cmd->addr_bang))
	    {
	      /* skip all this next time */
	      cur_cmd->a1.addr_type = addr_is_null;
	      cur_cmd->addr_bang = 1;
	      /* can we make an optimization? */
	      if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
		  || cur_cmd->cmd == '{')
		--branches;
	      cur_cmd->cmd = ':';	/* replace with no-op */
	      if (branches == 0)
		{
		  /* whew!  all that just so that we can get to here! */
		  cur_cmd = shrink_program(vec, cur_cmd);
		  if (!cur_cmd && no_default_output)
		    return 1;
		  end_cmd = vec->v + vec->v_length;
		  if (!cur_cmd)
		    cur_cmd = end_cmd;
		  continue;
		}
	    }
	}
#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/

      /* this is buried down here so that a "continue" statement can skip it */
      ++cur_cmd;
    }
    return 0;
}



/* Apply the compiled script to all the named files. */
countT
process_files(the_program, argv)
  struct vector *the_program;
  char **argv;
{
  static char dash[] = "-";
  static char *stdin_argv[2] = { dash, NULL };
  struct input input;

  line_init(&line, INITIAL_BUFFER_SIZE);
  line_init(&hold, 0);
  line_init(&buffer, 0);

#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
  branches = count_branches(the_program);
#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
  input.file_list = stdin_argv;
  if (argv && *argv)
    input.file_list = argv;
  input.bad_count = 0;
  input.line_number = 0;
  input.read_fn = read_always_fail;
  input.fp = NULL;

  while (read_pattern_space(&input, 0))
    {
      flagT quit = execute_program(the_program, &input);
      if (!no_default_output)
	output_line(line.text, line.length, line.chomped, stdout);
      if (quit)
	break;
    }
  closedown(&input);

#ifdef DEBUG_LEAKS
  /* We're about to exit, so these free()s are redundant.
     But if we're running under a memory-leak detecting
     implementation of malloc(), we want to explicitly
     deallocate in order to avoid extraneous noise from
     the allocator. */
  release_append_queue();
  do_subst(NULL);
  FREE(buffer.text);
  FREE(hold.text);
  FREE(line.text);
#endif /*DEBUG_LEAKS*/
  return input.bad_count;
}
