/*
 * This software is provided as is without any warranty of any kind.
 * You are free to copy and distribute it as you like in source or
 * binary form provided that this notice and author credits remain intact
 * in the source.  You are also free to alter, enhance, embellish, or 
 * otherwise change its content provided that the credits to the
 * original author remain, and that any changes are commented stating
 * the change made and the author of that change.  You may NOT profit in
 * any way from the sale or distribution of this software in its original
 * or altered form.
 */

/*
 * Program : scanfile.c (include scanfile.h)
 * Author  : Jeffrey M. Metcalf
 * Date    : 14-May-98
 * Re      : scans an arbitrary larger file (bfile) for occurrences
 *           of a smaller (sfile).  Extremely configurable.
 */

#include <sys/stat.h>

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

/*
 * Uncomment the following if compiling
 * with Cygnus B-19 GNU environment for Win32.
 */

#define WIN32 


/*
 * Include file 'getopt.h' in Cygnus B-19 GNU environment.
 * May or may not be applicable in other GNU Win32 
 * environments.
 */

#ifdef WIN32
#include "getoptux.c"
#endif



#include "scanfile.h"

char *sbuf;              /* pointer to buffer for smaller file */
char *bbuf;              /* pointer to buffer for larger file  */
char *ofile;             /* pointer to basename of the output file name(s) */
char *progname;          /* name of the executable that invokes us */
char *bfile;             /* name of the larger file */
char *sfile;             /* name of the smaller file */

/* option flags */
int aflag, bflag, tflag, dflag, qflag, vflag, fflag, cflag, oflag;

unsigned long offset;  /* holds current offset of the scan */
unsigned long snum;    /* holds the starting number for the scan */
unsigned long incr=1l; /* holds the offset increment for each scan */
unsigned long flim;    /* holds the maximum acceptable fail count */
unsigned long cflim;   /* holds the maximum acceptable consecutive fail count */

unsigned long vnum;    /* holds the multiple of the offset count at which */
                       /* the status will print */

struct scan *match;    /* holds the pointer to the dummy scan structure */
                       /* that will have failures set to 'flim' and     */
                       /* consecutive failures set to 'cflim' */

int
main(argc, argv)
	int argc;
	char *argv[];
{
	extern int optind;
	extern char *optarg;

	struct scan *cscan;     /* represents the current scan status */
	struct scan *best;      /* represents the best observed scan status */
	int ch;
	unsigned long bufsize;  /* the size of sfile and hence its buffer */
	unsigned long bbufsize; /* the size of bfile and hence its buffer */

	setlocale(LC_CTYPE, "");


	/* get progname from *argv */

	progname = *argv;


	while ((ch = getoptux(argc, argv, "abtdqv:f:c:s:i:o:?h")) != EOF)
		switch (ch) {
		case 'a':           /* give me info on all matches */
			aflag = 1;
			break;
		case 'b':           /* tell me best match, no matter how bad */
			bflag = 1;
			break;
		case 't':           /* if user knows that sfile is text */
			tflag = 1;  /* instruct program to strip the final */
			break;      /* newline.  Not implemented in Win32 */

		case 'd':           /* display all error offsets */
			dflag = 1;  /* relative to sfile         */
			break;
		case 'q':           /* be quiet about reporting matches */
			qflag = 1;  /* useful when only return code matters */
			break;
		case 'v':           /* tell me about the scan progress */
			vflag = 1;
			vnum = strtoul(optarg,NULL,0);
			break;
		case 'f':           /* set largest number of acceptable fails */
			fflag = 1;  /* default is 0 */
			flim = strtoul(optarg,NULL,0);
			break;
		case 'c':           /* set largest number of acceptable */
			cflag = 1;  /* consecutive fails                */
			cflim = strtoul(optarg, NULL, 0); /* default is 0 */
			break;
		case 's':           /* set the offset for the first scan */
			snum = strtoul(optarg, NULL, 0); /* default is 0 */
			offset+=snum;
			break;
		case 'i':           /* increment each scan by 'incr' */
			incr = strtoul(optarg, NULL, 0); /* default is 1 */
			break;
		case 'o':           /* set the basename for an output file */
			oflag = 1;
			ofile = optarg;
			break;
		case '?':
		case 'h':
		default:
			usage();
		}
	argc -= optind;
	argv += optind;

	if (argc != 2)
		usage();

/*
 * It's far too much work recalculating buffersize from stat info for cr-lf
 * files.  The functionality of -t just isn't worth the trouble.
 */

#ifdef WIN32
	if (tflag) {
		(void)fprintf(stderr,
		   "%s: the -t option is not supported under Win32\n", progname);
		exit(-1);
	}
#endif

	bfile = *argv;
	bbufsize = bufinfo(*argv);       /* tell me the big file's size    */
	bbuf = bufread(*argv,bbufsize);  /* point to the big file's buffer */

	argc--;
	argv++;

	sfile = *argv;
	bufsize = bufinfo(*argv);        /* tell me the small file's size   */
	sbuf = bufread(*argv,bufsize);   /* pint to the small file's buffer */

	if (bbufsize < bufsize) {        /* don't dyslexic be */
		(void)fprintf(stderr,
			"%s: %s is smaller than %s\n", progname, bfile, sfile);
		exit(-1);
	}

	if (cflag && (cflim == 0l)) {  /* set cfl=1 to disallow consec errors */
		(void)fprintf(stderr,   
			"%s: to check for no consecutive errors -- set cfl = 1\n",
				progname);
		exit(-1);
	}
	if (fflag && cflim > flim) {     /* consecutive error count can    */
                                         /* never exceed total error count */
		(void)fprintf(stderr,
			"%s: cfl cannot exceed fl when -f is selected -- setting fl=cfl\n", progname);
		flim = cflim;

	}

	if (tflag && (sbuf[bufsize-1] == '\n')) {  /* silly little -t option */
		bufsize--;                         /* too lazy to implement  */
		tflag++;                           /* in Win32               */
	}

	cscan = salloc(bufsize);
	if (oflag) getbuf(cscan->buf,bufsize);
	best = cscan;
	match = salloc(0l);                        /* 'match' is a pointer to */
	match->fail = flim;                        /* a scan that satisfies   */
	match->cfail = cflim;                      /* error count limits      */
	if (vflag)
		printf("Scanning...\n");
	while ((bbufsize - offset) >= bufsize) {
		if (vflag && offset%(incr*vnum) == 0l )
			printf("Offset = 0x%8x\n", offset);
		if (cmpbuf(cscan, best, bufsize)) {
			if (offset-snum) freescan(best); /* don't free self   */

			best = cscan;     /* free old best, this one's better */

			cscan = salloc(bufsize);         /* get me a new scan */
		}
		if (! aflag && ! bflag)
		        if (judgebest(best,match)) {      /* is current best  */
			        printbest(best, bufsize); /* a match? If said */
				return 0;                 /* opts and it's a  */
			}                           /* match, print and term  */
		offset += incr;
	}
	if (bflag) {                 /* do we want to see the best observed */
				     /* match?  If so it will always be the */
				     /* last one we find with error counts  */
				     /* no worse that the best we have seen */
		if (! qflag)
	        (void)fprintf(stdout,
			"***************\nLast Best Match\n***************\n");
	        printbest(best,bufsize);

	}
	return (! judgebest(best, match));
}


unsigned long bufinfo(char *filen) {   /* stat the file, get its size */

        struct stat stbuf;   /* inode data structure */
	unsigned long bs;

	if (stat(filen, &stbuf) == -1) {
		(void)fprintf(stderr,
			"%s: can't stat %s\n", progname, filen);
		exit(-1);
	}
	if ((bs = (unsigned long)stbuf.st_size) == 0l) {
	        (void)fprintf(stderr,
			"%s: empty file %s\n", progname, filen);
		exit(-1);
	}

	return bs;
}

char *bufread(char *filen, unsigned long bs) {  /* read filen into the buffer */
                                                /* return pointer to buffer   */

        char *bf;    /* pointer to buffer to allocate, fill, and return */
	FILE *fp;    /* pointer to file stream */

#ifdef WIN32
	const char *mode="rb";   /* annoying DOS cr-lf handling */
#else
	const char *mode="r";
#endif

	if ((fp = fopen(filen, mode)) == NULL) {
		(void)fprintf(stderr,
			"%s: can't open %s\n", progname, filen);
		exit(-1);
	}
	if ((bf = (char *)malloc(bs)) == NULL) {
	        (void)fprintf(stderr,
			"%s: can't malloc buffer for %s\n", progname, filen);
		exit(-1);
	}
	if (fread(bf,bs,1,fp) < 1) {
		(void)fprintf(stderr,
			"%s: error reading %s\n", progname, filen);
		exit(-1);
	}
	if (fclose(fp) == EOF)
		(void)fprintf(stderr,
			"%s: error closing %s\n", progname, filen);

	return bf;
}

int judgebest(struct scan *c, struct scan *b) {   /* compare two scans by  */
                                                  /* the number of fails and */
	if (cflag && fflag) {                     /* consecutive fails */
		if ((c->fail <= b->fail) && (c->cfail <= b->cfail))
			return BEST;
	}                                         /* if -c # but not -f # is  */
	else if (cflag) {                         /* given, then only the max */
		if (c->cfail <= b->cfail)         /* # of consecutive fails is*/
			return BEST;              /* considered.  Total fails */
	}                                         /* can become unbounded     */
	else {
		if (c->fail <= b->fail)           /* if only set fflag or not */
			return BEST;              /* fflag and cflag means    */
	}                                         /* only total fails count   */
	return NOTBEST;
}

int cmpbuf(struct scan *c, struct scan *b, unsigned long bs) {

	unsigned long index, cf=0l;               /* do work of the current   */
	int jm=0, jb=0;                           /* scan; compare sfile to   */
                                                  /* the slice of bfile       */
	c->os = offset;
	c->fail = 0;
	c->cfail = 0;

	for (index=0l; index < bs; index++) {     /* if no match, increment   */
                                                  /* fail counts              */
		if (bbuf[index+offset] != sbuf[index]) {
			if (dflag) (c->diffs)[c->fail] = index;
			(c->fail)++;
			cf++;
		}
		else {
			if (cf > c->cfail) c->cfail = cf;  /* reached a match */
			cf = 0l;                           /* point. set the  */
		}                                   /* consecutive fail count */

		if (! bflag && (offset - snum))   /* always think initial scan*/
		        if (! judgebest(c,match)) /* is best so far; do not   */
			        return NOTBEST;   /* return unfavorably when  */
	}                                         /* offset = snum            */

	if (cf > c->cfail) c->cfail = cf;

	jb = judgebest(c,b);        /* Once we have the counts, compare them. */
	jm = judgebest(c,match);    /* Always see if we're at least a match   */
                                    /* based on limit criteria                */

	if (jb || jm)
	        if (oflag)
		       getbuf(c->buf, bs); /* we're interested in this buffer */
                                           /* so get it                       */

	if (aflag && jm)            /* we're a match and -a is set, so print  */
	        printbest(c, bs);

	if (! bflag && ((offset-snum) == 0l)) return BEST;

	return jb;
}

void getbuf(char *nbuf, unsigned long max) {

	unsigned long indx;          /* set new buffer = bbuf slice at offset */       

	for (indx=0l; indx < max; indx++)
		nbuf[indx] = bbuf[indx+offset];

	if (tflag > 1) nbuf[max] = '\n';  /* reattach our newline if -t opt */

}

void printbest(struct scan *b, unsigned long bs) {
                                                       /* general output fncn */
	unsigned long diffindx;                        /* if scan is a best   */
	char *mtype = "Maximum";

	if (! qflag) {
		(void)printf("---------------------------------\n");
		(void)printf("Offset: 0x%x\n", b->os);
		(void)printf("Number of failures: %d\n", b->fail);
		if ((! fflag) && cflag) 
			mtype = "Minimax";
			(void)printf("%s number of consecutive failures: %d\n",
				mtype,b->cfail);
		if (dflag && (b->fail > 0l)) {
			(void)printf(
			"All differences are relative to %s.\n", sfile);
			for (diffindx = 0l; diffindx < b->fail; diffindx++) {
				(void)printf("0x%x ",(b->diffs)[diffindx]);
				if ((diffindx+1)%8 == 0l) (void)printf("\n");
			}
			(void)printf("\n");
		}
	}
	if (tflag > 1) bs++;
	if (oflag) spitfile(b, bs);
	return;
}

void spitfile(struct scan *b, unsigned long bs) {
                                  /* the -o option is set so we want any */
	char *outptfn;            /* matches to be output to files; the  */
	int stringlength;   		  /* basename is user defined in 'ofile',*/
	FILE *fp;                 /* and the extension will be the bfile */
                                  /* offset where the match is made.     */

        char sufx[MAXHEX+4];      /* array for file extension of output  */
                                  /* MAXHEX is set in scanfile.h         */

        int sufxlen;              /* length of suffix; length of file    */
                                  /* extension plus added '.' up front   */

#ifdef WIN32
        const char *mode="wb";
#else
        const char *mode="w";
#endif


	sufxlen = sprintf(sufx, ".0x%x", b->os);

	stringlength = strlen(ofile)+sufxlen+1;      /* how long the entire */
                                                     /* filename will be    */

	if ((outptfn = (char *)malloc(stringlength)) == NULL) {
	        (void)fprintf(stderr,
			"%s: can't generate output file name\n", progname);
		return;
	}

	outptfn = strncpy(outptfn,ofile,strlen(ofile)+1);  /* +1 to get '\0' */
	outptfn = strncat(outptfn,sufx,strlen(sufx));

	if ((fp = fopen(outptfn,mode)) == NULL) {
	        (void)fprintf(stderr,
			"%s: can't create output file %s\n",progname, outptfn);
	}
	else if (fwrite(b->buf, bs, 1, fp) < 1) {
	        (void)fprintf(stderr,
			"%s: error writing %s\n", progname, outptfn);
	}
	if ((fp != NULL) && (fclose(fp) == EOF))
	        (void)fprintf(stderr,
			"%s: error closing %s\n", progname, outptfn);
	free(outptfn);
}

struct scan *salloc(unsigned long bs) {   /* allocate memory for a scan     */
                                          /* only allocate memory for       */
	struct scan *cs;                  /* output buffers and difference  */
	unsigned long *df;                /* vectors if options call for it */
	char *cb;                         /* try not to be a memory hog     */
                                          /* bs can be zero, like 'match'   */
					  /* but we can handle it           */
                                          

	cs = (struct scan *)malloc(sizeof(struct scan));
	if (bs && oflag) cb = (char *)malloc(bs);
	if (bs && dflag) df = (unsigned long *)calloc(bs,sizeof(unsigned long));

	if (cs == NULL || (bs && oflag && (cb == NULL)) || 
	    (bs && dflag && (df == NULL))) {
		(void)fprintf(stderr,
			"%s: can't malloc\n", progname);
		exit(-1);
	}

	cs->diffs = df;
	cs->buf = cb;

        return cs;
}

void freescan(struct scan *s) {            /* free a scan pointer */

	if (oflag) free(s->buf);
	if (dflag) free(s->diffs);
	free(s);
}

void usage(void) {              /* print usage and exit with return value -1 */
	(void)fprintf(stderr,
#ifdef WIN32
		"Usage: %s [-abd] [-v vn] [-f fl] [-c cfl] [-s start] \
[-i incr] [-o of] bfile sfile\n", progname);
#else
		"Usage: %s [-abtdq] [-v vn] [-f fl] [-c cfl] [-s start] \
[-i incr] [-o of] bfile sfile\n", progname);
#endif
	exit(-1);
}

