/* $Id: wget.c,v 1.7 2002/06/23 20:28:31 richdawe Exp $ */

/*
 *  wget.c - Interface to wget for pakke
 *  Copyright (C) 2001, 2002 by Richard Dawe
 *      
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "common.h"

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <regex.h>

#include <libpakke/util.h>

#include "pakke.h"
#include "platform.h"
#include "wget.h"

/*
 * What are the return values from reg*() on success? DJGPP has REG_OKAY;
 * Linux has REG_NOERROR; POSIX says 0.
 */
#ifndef REG_NOERROR
#ifdef REG_OKAY
#define REG_NOERROR REG_OKAY
#else
#define REG_NOERROR 0
#endif /* REG_OKAY */
#endif /* !REG_NOERROR */

static int wget_inited = 0;

static char wget[PATH_MAX + 1];

static char **http_proxies = NULL;
static char **ftp_proxies  = NULL;

/* If there's more than one proxy, choose which one to use on
 * a round-robin basis. */
static int n_http_proxy = 0;
static int n_ftp_proxy  = 0;

static const char WGET_OPT_VERBOSE[]     = "-v";
static const char WGET_OPT_QUIET[]       = "-q";
static const char WGET_OPT_DEST[]        = "-O";
static const char WGET_OPT_PASSIVE_FTP[] = "--passive-ftp";
static const char WGET_OPT_PROXY_ON[]    = "--proxy=on";
static const char WGET_OPT_PROXY_OFF[]   = "--proxy=off";

static regex_t list_regex;

/* -------------
 * - wget_init -
 * ------------- */

/* Find the wget executable. */

static void
wget_uninit_wrapper (void)
{
  wget_uninit();
}

int
wget_init (const char *root, char **use_http_proxies, char **use_ftp_proxies)
{
  platform_class_t  detected_platform_class;
  const char       *wget_env;
  char              pathname[PATH_MAX];
  int               found = 0;

  /* Already initialised? */
  if (wget_inited)
    return(wget_inited);

  memset(wget, 0, sizeof(wget));
  memset(pathname, 0, sizeof(pathname));

  http_proxies = use_http_proxies;
  ftp_proxies  = use_ftp_proxies;

  /* --- Set up regular expressions used in parsing wget output --- */

  /* This isn't a particular intelligent regexp for matching wget's
   * HTML-ised FTP directory listings, but it works. This will need
   * updating if wget's behaviour changes. */
  /* TODO: Does this work with web server directory listings? */
  if (regcomp(&list_regex, "<a href=", REG_EXTENDED|REG_ICASE) != 0)
    return(0);

  /* --- Find wget --- */

  detected_platform_class = detect_platform_class();

  /* Has the user set the WGET environment variable, to point us to
   * the executable? */
  wget_env = getenv("WGET");
  if (!found && (wget_env != NULL)) {
    if (strlen(wget_env) >= PATH_MAX) {
      warnf("WGET environment variable's contents are "
	    "too long to be a filename");
    } else {
      strcpy(wget, wget_env);
      found = 1;
    }
  }

  /* Has the user set the wget option in the configuration file? */
  /* TODO */

  /* Try to find wget in the pakke share directories. */
  if (!found && (detected_platform_class != PLATFORM_CLASS_NONE)) {
    strcpy(pathname, root);
    addforwardslash(pathname);

    switch(detected_platform_class) {
    case PLATFORM_CLASS_DOS:
      strcat(pathname, PAKKE_DOS_PREFIX);
      break;

    case PLATFORM_CLASS_WIN16:
      strcat(pathname, PAKKE_WIN16_PREFIX);
      break;

    case PLATFORM_CLASS_WIN32:
      strcat(pathname, PAKKE_WIN32_PREFIX);
      break;

    default:
      /* TODO: Some kind of error? */
      break;
    }

    addforwardslash(pathname);
    strcat(pathname, "wget/");
    strcat(pathname, "wget.exe");

    if (!access(pathname, X_OK)) {
      strcpy(wget, pathname);
      found = 1;
    }
  }

  /* If we can't find it in the pakke share directory, look in the path. */
  /* TODO */

  /* Set up the exit handler. */
  atexit(wget_uninit_wrapper);

  /* Done */
  wget_inited++;

  return(1);
}

/* ---------------
 * - wget_uninit -
 * --------------- */

int
wget_uninit (void)
{
  /* Initialised? */
  if (!wget_inited)
    return(1);

  /* Tidy up */
  regfree(&list_regex);

  wget_inited--;

  return(1);
}

/* -------------------
 * - wget_executable -
 * ------------------- */

/* Return the path for the wget executable. */

const char *
wget_executable (void)
{
  if (*wget == '\0')
    return(NULL);

  return(wget);
}

/* ----------------
 * - choose_proxy -
 * ---------------- */

/*
 * Choose an HTTP/FTP proxy on a round-robin basis (1st, then 2nd, etc.).
 * Return the URL for the proxy.
 *
 * ASSUMPTION: *n_proxy is within the bounds of the proxies array.
 */

static char *
choose_proxy (int *n_proxy, char **proxies)
{
  char *proxy = NULL;

  /* If we have no list or an empty list, just return. */
  if (!proxies || !proxies[0])
    return(NULL);

  if (!proxies[*n_proxy])
    *n_proxy = 0;

  proxy = proxies[*n_proxy];
  (*n_proxy)++;

  return(proxy);
}

/* ------------
 * - wget_get -
 * ------------ */

/* Always use passive FTP, to avoid problems with firewalls. */

int
wget_get (const char *url, const char *dest, const int verbosity)
{
  static char scheme[5]; /* "http" or "ftp" */
  char   *proxy = NULL;
  char   *buf;
  size_t  buflen;
  int     ret;

  /* Nothing to do */
  if (url == NULL)
    return(0);

  /* No wget => no get */
  if (wget_executable() == NULL)
    return(0);

  /* Choose a proxy, based on the URL. */
  if (!get_url_component(URL_COMP_SCHEME, url, scheme, sizeof(scheme)))
    return(0);

  if (strcasecmp(scheme, "http") == 0)
    proxy = choose_proxy(&n_http_proxy, http_proxies);
  else if (strcasecmp(scheme, "ftp") == 0)
    proxy = choose_proxy(&n_ftp_proxy, ftp_proxies);
  else
    return(0);

  /* Calculate command-line length */
  buflen  = strlen(wget_executable()) + 1;
  buflen += strlen(WGET_OPT_PASSIVE_FTP) + 1;
  if (verbosity == V_NORMAL)
    buflen += strlen(WGET_OPT_QUIET) + 1;
  else
    buflen += strlen(WGET_OPT_VERBOSE) + 1;
  buflen += strlen(url) + 1;
  buflen += strlen(WGET_OPT_DEST) + 1 + strlen(dest) + 1 /* nul */;

  /* Allocate & build command-line */
  buf = malloc(buflen);
  if (buf == NULL)
    return(0);

  memset(buf, 0, buflen);

  strcpy(buf, wget_executable());
  strcat(buf, " ");
  strcat(buf, WGET_OPT_PASSIVE_FTP);
  strcat(buf, " ");
  if (verbosity == V_NORMAL)
    strcat(buf, WGET_OPT_QUIET);
  else
    strcat(buf, WGET_OPT_VERBOSE);
  strcat(buf, " ");
  strcat(buf, url);
  strcat(buf, " ");
  strcat(buf, WGET_OPT_DEST);
  strcat(buf, " ");
  strcat(buf, dest);

  /* Run wget */
  if (verbosity != V_NORMAL)
    infof("Invoking wget: \"%s\"\n", buf);

  ret = system(buf);

  /* Tidy up */
  free(buf);

  return(1);
}

/* --------------------
 * - wget_get_listing -
 * -------------------- */

char **
wget_get_listing (const char *url, const int verbosity)
{
  char        **list = NULL;
  char          list_filename[L_tmpnam];
  FILE         *list_file = NULL;
  struct stat   sbuf;
  char         *buf = NULL;
  size_t        buflen, bufspc;
  char         *p = NULL, *q = NULL;
  int           n_lines, i;
  regmatch_t    regmatch;
  int           ret;

  /* Nothing to do */
  if (url == NULL)
    return(0);

  /* No wget => no get */
  if (wget_executable() == NULL)
    return(0);

  /* Get a temporary file name for the listing file. */
  if (tmpnam(list_filename) == NULL)
    return(0);

  /* Retrieve the directory index. */
  ret = wget_get(url, list_filename, verbosity);

  if (stat(list_filename, &sbuf) != 0) {
    /* Tidy up and fail */
    remove(list_filename);

    return(list);
  }

  /* Allocate a buffer large enough to hold the index's text. */
  bufspc = buflen = (size_t) sbuf.st_size + 1 /* to reach EOF */ + 1 /* nul */;

  p = buf = malloc(buflen);
  if (buf == NULL) {
    /* Tidy up and fail */
    remove(list_filename);

    return(list);
  }

  memset(buf, 0, buflen);

  /* Read in the directory index. */
  list_file = fopen(list_filename, "rt");
  if (list_file) {
    for (n_lines = 0; fgets(p, bufspc, list_file) != NULL; n_lines++) {
      bufspc -= strlen(p);
      p      += strlen(p);
    }

    fclose(list_file), list_file = NULL;
  } else {
    /* Tidy up and fail */
    free(buf);
    remove(list_filename);

    return(list);
  }

  /* Allocate a list for the filenames. This allocates too many lines,
   * but we assume that the overhead of ignored lines isn't that great. */
  list = malloc((n_lines + 1) * sizeof(char *));
  if (list == NULL) {
    /* Tidy up and fail */
    free(buf);
    remove(list_filename);

    return(list);
  }

  for (i = 0; i <= n_lines; i++) { list[i] = NULL; }

  /* Now parse the file names from the index. */
  for (p = buf, i = 0; (p != NULL) && (i < n_lines); ) {
    /* Separate this line from the next */
    q = strchr(p, '\n');
    if (q != NULL) {
      *q = '\0';
      q++;
    }

    ret = regexec(&list_regex, p, 1, &regmatch, 0);

    if (ret == REG_NOERROR) {
      /* Make a copy of the string and add it to the list. */
      char *r;
      
      p += regmatch.rm_so;
      p  = strchr(p, '>');
      p++;
      r  = strchr(p, '<');
      *r = '\0';

      list[i] = strdup(p);
      if (list[i] == NULL) {
	/* TODO: Error handling */
      }

      i++;
    } else if (ret != REG_NOMATCH) {
      /* TODO: Error handling */
    }

    /* Next line */
    p = q;
  }

  /* If there are no entries in list, just free it. */
  if (list[0] == NULL) {
    free(list);
    list = NULL;
  }

  /* Tidy up */
  free(buf);
  remove(list_filename);

  return(list);
}

#ifdef TEST
int
main (int argc, char *argv[])
{
  char **list;
  int i;

  /* getenv() better not fail! */
  if (!wget_init(getenv("DJDIR"), NULL, NULL)) {
    fprintf(stderr, "wget_init() failed\n");
    return(EXIT_FAILURE);
  }

  list = wget_get_listing("ftp://iolanthe/pub/", V_VERBOSE);

  if (list) {
    for (i = 0; list[i]; i++) {
      printf("%d: %s\n", i, list[i]);
    }
  }

  if (!wget_uninit()) {
    fprintf(stderr, "wget_init() failed\n");
    return(EXIT_FAILURE);
  }

  return(EXIT_SUCCESS);
}
#endif
