/* 
 * Copyright (C) 2000-2003 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * url.c - parse and manage urls
 *
 */

#include "puf.h"

int economize_dns;
static url_t *urllist;
static int real_num_urls;
static proxy_t *proxylist;

/*  calculate hash code for given url. eliminate duplicates  */
int 
find_url(const char *path, int len, hinfo_t *hinfo, u_short port, int *hashp)
{
    int hash = calc_nhash(path, len) ^ (int)hinfo ^ port;

#define ucmp(u) u->host->info == hinfo && u->port == port && \
	    !memcmp(u->local_part, path, len) && u->local_part[len] == '\0'
#define ufnd(u) \
	  if (port != 80) \
	    dbg(URL, ("http://%s:%d/%.*s already in chain, not adding\n", \
		      hinfo->name, port, len, path)); \
	  else \
	    dbg(URL, ("http://%s/%.*s already in chain, not adding\n", \
		      hinfo->name, len, path)); \
	    return 1;

    h_find(urllist, real_num_urls, url_t, hash, ucmp, ufnd);
    *hashp = hash;
    return 0;
}

/*  Return 1 if host:port is an acceptable ref from referer, otherwise 0  */
int
same_host(url_t *referer, host_t *host, u_short port, int isreq)
{
    ptrarr_t *sh;
    int l, ml, foll;
    unsigned u;

    if (referer) {
	foll = isreq ? 
	     referer->parm->opt->follow_src : referer->parm->opt->follow_href;
	if (foll <= HOST_RECURSIVE &&
	    (host->info != referer->host->info || port != referer->port))
	{
	    if (foll == HOST_RECURSIVE) {
		sh = isreq ?
		    referer->parm->opt->rdom_list :
		    referer->parm->opt->ldom_list;
		if (sh->nents) {
		    l = strlen(host->info->name);
		    for (u = 0; u < sh->nents; u++)
			if (((char **)sh->ents)[u][0] == '.') {
			    ml = strlen(((char **)sh->ents)[u]);
			    if (l > ml &&
				!strncasecmp(host->info->name + l - ml,
					     ((char **)sh->ents)[u], ml))
				return 1;
			} else {
			    if (patternMatch(host->info->name, l,
					     ((char **)sh->ents)[u]))
				return 1;
			}
		}
	    }
	    return 0;
	}
    }
    return 1;
}

/*  Return 1 if url and referer are in the same directory, otherwise 0  */
int 
same_dir(const char *path, int len, url_t *referer, int is_req)
{
    int foll, md, d, p = referer->disp_pathoff;
    const char *lp = referer->local_part;

    foll = is_req ? 
	referer->parm->opt->follow_src : referer->parm->opt->follow_href;
    if (foll >= HOST_RECURSIVE)
	return 1;

    /*  check, if in same top-level disposition directory as referer  */
    if (p != -1) {
	if (len < p)
	    goto notsub;
	while (lp[p] != '/')
	    p++;
	if (len < p)
	    goto notsub;
	if (memcmp(path, lp, p))
	    goto notsub;
	if (len > p && path[p] != '/')
	    goto notsub;
    }

  if (foll == SAMEDIR_RECURSIVE)
    md = 0;
  else {
    if (is_req) {
	    return 1;
    } else {
	md = referer->parm->opt->max_depth;
	if (md < 0)
	    return 1;
    }
  }

    /*  now check, if max directory nesting level reached  */
	for (p++, d = 0; p < len; p++)
	    if (path[p] == '/')
		if (++d > md) {
		    dbg(URL, 
			("not added '/%.*s' (directories to deeply nested)\n",
			 len, path));
		    return 0;
		}

    return 1;

  notsub:
    dbg(URL, ("not added '/%.*s' (different top-dir)\n", len, path));
    return 0;
}


int
print_url(char *buf, int bufl, url_t *u, int auth)
{
    int dl;
    char abuf[SHORTSTR];

    dl = 0;
    lcat_str(buf, bufl, dl, "http://");
    if (auth && u->parm->http_auth) {
	lcat_mem(buf, bufl, dl, decode_auth(abuf, u->parm->http_auth), abuf);
	lcat_chr(buf, bufl, dl, '@');
    }
    lcat_str(buf, bufl, dl, u->host->name);
    if (u->port != 80)
	lcat_mem(buf, bufl, dl, sprintf(abuf, ":%d", u->port), abuf);
    lcat_chr(buf, bufl, dl, '/');
    lcat_str(buf, bufl, dl, u->local_part);
    lcat_chr(buf, bufl, dl, 0);
    return dl - 1;
}


static void
pu_err(int lev, const char *err, const char *srct, const char *url, int len,
       url_t *ref)
{
    char fmt[128];

    if (ref) {
	snprintf(fmt, sizeof(fmt), "%s (%%s from $u)\n", err);
	prxu(lev, ref, fmt, len, url, srct);
    } else {
	snprintf(fmt, sizeof(fmt), "%s (from %%s)\n", err);
	prx(lev, fmt, len, url, srct);
    }
}

static void
cplbuf(char *dst, const char *src, size_t len)
{
    size_t p;

    for (p = 0; p < len; p++)
	dst[p] = tolower((int)src[p]);
    dst[p] = '\0';
}

static int
flushseg(purl_t *pu, const char *aoff, int alen, int aquot)
{
    char *b = (char *)pu->lpart;

    if (!aquot) {
	if (pu->lpartlen + alen >= SHORTSTR)
	    return 0;
	memcpy(b + pu->lpartlen, aoff, alen);
	pu->lpartlen += alen;
    } else {
	for (; alen > 0; aoff++, alen--) {
	    if (*aoff == ' ') {
		if (pu->lpartlen + 3 >= SHORTSTR)
		    return 0;
		b[pu->lpartlen++] = '%';
		b[pu->lpartlen++] = '2';
		b[pu->lpartlen++] = '0';
	    } else {
		if (pu->lpartlen + 1 >= SHORTSTR)
		    return 0;
		b[pu->lpartlen++] = *aoff;
	    }
	}
    }
    return 1;
}

#define PU_OK		0
#define PU_DROP		1
#define PU_UNK		2
#define PU_BAD		3
#define PU_MYBAD	PU_BAD

#define MAXSEG		100 /* maximum number of path segments in url */

static int 
parse_url(const char *srct, const char *url, int len, url_t *ref, int no_cgi,
	  purl_t *pu, char *lpbuf, char *hostbuf, int *hostlen)
{
    int p, noho, ho, eho, po, sp, et;
    int havebad, oplen, nsegs, aoff, alen, aquot, cseg, is_cgi;
    int segos[MAXSEG]; /* offsets */
    int segls[MAXSEG]; /* lengths */
    int segqs[MAXSEG]; /* quoting needed */

    /*  get protocol  */
    for (p = 0; ; p++) {
	if (p >= len) {
	  brken:		/*  no protocol. impossible in good url.  */
	    if (!ref) {		/*  user-supplied  */
	      rhttp:
/*		pu->proto = PR_HTTP;*/
		p = 0;
		break;
	    } else {		/*  implies reference  */
		pu->host = ref->host;
		pu->port = ref->port;
		if (len >= 1 && url[0] == '/') {
		    if (len >= 2 && url[1] == '/') {
			p = 2;
/*			pu->proto = ref->proto;*/
			break;
		    } else {
			p = 1;
			goto getflocal;
		    }
		} else {
		    p = 0;
		    oplen = ref->path_len;
		    goto getlocal;
		}
	    }
	}
	if (url[p] == ':') {
	    noho = len < p + 2 || url[p + 1] != '/' || url[p + 2] != '/';
	    if (noho & !ref)
		goto rhttp;
	    if (len < 7 || strncasecmp(url, "http:", 5)) {
		pu_err(ref ? WRN : ERR, "unknown protocol in %.*s",
		       srct, url, len, ref);
		return PU_UNK;
	    }
	    if (noho) {
		pu_err(WRN, "no host in %.*s", srct, url, len, ref);
		return PU_BAD;
	    }
/*	    pu->proto = PR_HTTP;*/
	    p += 3;
	    break;
	}
	if (!isalpha((int)url[p]))
	    goto brken;
    }

  gethost:
    pu->host = 0;
    /*  get host & port  */
    pu->auth = 0;
    pu->authlen = 0;
  reho:
    for (ho = p, po = 0; p < len; p++) {
	if (url[p] == '/') {
	    eho = p++;
	    goto halo;
	} else if (url[p] == ':')
	    po = p;
	else if (url[p] == '@') {
	    pu->auth = url + ho;
	    pu->authlen = p++ - ho;
	    goto reho;
	}
    }
    eho = p;
  halo:

    if (po) {
	if (!(pu->port = atoi(url + po + 1))) {
	    pu_err(ref ? WRN : ERR, "invalid port in %.*s", srct, url, len, ref);
	    return PU_BAD;
	}
    } else {
	pu->port = 80;
	po = eho;
    }

    if (po == ho) {
	pu_err(ref ? WRN : ERR, "no hostname in %.*s", srct, url, len, ref);
	return PU_BAD;
    }
    if (po - ho >= SHORTSTR) {
	pu_err(ref ? WRN : ERR, "too long hostname in %.*s", srct, url, len, ref);
	return PU_MYBAD;
    }
    *hostlen = po - ho;
    cplbuf(hostbuf, url + ho, po - ho);

  getflocal:
    oplen = 0;
  getlocal:
    nsegs = 0;
    is_cgi = 0;
    for (; p < len; ) {
	havebad = 0;
	for (sp = p; p < len; p++) {
	    if (url[p] == '/') {
		et = p++;
		goto gotsl;
	    }
	    if (url[p] == '?') {
		if (no_cgi) {
		    pu_err(NFO, "%.*s is CGI", srct, url, len, ref);
		    return PU_DROP;
		}
		if (nsegs == MAXSEG) {
		    pu_err(ref ? WRN : ERR,
			   "local part of %.*s has too many segments",
			   srct, url, len, ref);
		    return PU_MYBAD;
		}
		segos[nsegs] = sp;
		segls[nsegs] = len - sp;
		segqs[nsegs] = havebad ||
			       !!memchr(url + p + 1, ' ', len - p - 1);
		nsegs++;
		is_cgi = 1;
		goto gotlocal;
	    }
	    if (url[p] == ' ')
		havebad = 1;
	}
	et = p;
      gotsl:
	if ((et - sp != 0) && ((et - sp != 1) || url[sp] != '.')) {
	    if ((et - sp == 2) && url[sp] == '.' && url[sp + 1] == '.') {
		if (nsegs) {
		    nsegs--;
		    continue;
		} else if (oplen) {
		    while (--oplen > 0 && ref->local_part[oplen - 1] != '/');
		    continue;
		} else if (pu->host || *hostlen) {
		    if (len > p + 1 && url[p] == '/') { /*  ..//auth/  */
			p++;
			goto gethost;
		    }
		}
		pu_err(ref ? WRN : ERR, "%.*s points below root",
		       srct, url, len, ref);
		return PU_BAD;
	    } else {
		if (no_cgi && et - sp == 7 && !memcmp(url + sp, "cgi-bin", 7)) {
		    pu_err(NFO, "%.*s is CGI", srct, url, len, ref);
		    return PU_DROP;
		}
		if (nsegs == MAXSEG) {
		    pu_err(ref ? WRN : ERR,
			   "local part of %.*s has too many segments",
			   srct, url, len, ref);
		    return PU_MYBAD;
		}
		segos[nsegs] = sp;
		segls[nsegs] = p - sp;
		segqs[nsegs] = havebad;
		nsegs++;
	    }
	}
    }
  gotlocal:
    pu->pathlen = pu->lpartlen = oplen;
    if (!nsegs)
	pu->lpart = ref ? ref->local_part : 0;
    else {
	pu->lpart = lpbuf;
	if (oplen)
	    memcpy(lpbuf, ref->local_part, oplen);
	aoff = segos[0];
	alen = segls[0];
	aquot = segqs[0];
	for (cseg = 1; cseg < nsegs; cseg++) {
	    if ((segos[cseg] != (aoff + alen)) || (segqs[cseg] != aquot) ||
	        ((aquot || pu->lpartlen) && (cseg == (nsegs - 1)) &&
	         (is_cgi || (url[segos[cseg] + segls[cseg] - 1] != '/')))) {
		if (!flushseg(pu, url + aoff, alen, aquot)) {
		    pu_err(ref ? WRN : ERR, "too long local part in %.*s",
			   srct, url, len, ref);
		    return PU_MYBAD;
		}
		aoff = segos[cseg];
		alen = segls[cseg];
		aquot = segqs[cseg];
	    } else
		alen += segls[cseg];
	}
	if (!pu->lpartlen && !aquot) {
	    if (alen >= SHORTSTR) {
		pu_err(ref ? WRN : ERR, "too long local part in %.*s",
		       srct, url, len, ref);
		return PU_MYBAD;
	    }
	    pu->lpart = url + aoff;
	    pu->lpartlen = alen;
	    pu->pathlen = (!is_cgi && (url[aoff + alen - 1] == '/')) ?
			  alen : (alen - segls[nsegs - 1]);
	} else {
	    pu->pathlen = pu->lpartlen;
	    if (!flushseg(pu, url + aoff, alen, aquot)) {
		pu_err(ref ? WRN : ERR, "too long local part in %.*s",
		       srct, url, len, ref);
		return PU_MYBAD;
	    }
	    if (!is_cgi && (url[aoff + alen - 1] == '/'))
		pu->pathlen = pu->lpartlen;
	}
    }

/*    pu->srct = srct;*/

    if (pu->host)
	dbgu(URL, (ref, "$u ~ '%.*s' => [ '%s' @ %s : %i ] / '%.*s' '%.*s' "
		   "(%s copy)\n",
		   len, url, ref->parm->http_auth ? ref->parm->http_auth : "",
		   pu->host->name, pu->port,
		   pu->pathlen, pu->lpart,
		   pu->lpartlen - pu->pathlen, pu->lpart + pu->pathlen,
		   pu->lpart == lpbuf ? "with" : "no"));
    else if (ref)
	dbgu(URL, (ref, "$u ~ '%.*s' => '%.*s' @ %s : %i / '%.*s' '%.*s' "
		   "(%s copy)\n",
		   len, url, pu->authlen, pu->auth ? pu->auth : "",
		   hostbuf, pu->port,
		   pu->pathlen, pu->lpart,
		   pu->lpartlen - pu->pathlen, pu->lpart + pu->pathlen,
		   pu->lpart == lpbuf ? "with" : "no"));
    else
	dbg(URL, ("'%.*s' => '%.*s' @ %s : %i / '%.*s' '%.*s' (%s copy)\n",
		  len, url, pu->authlen, pu->auth ? pu->auth : "",
		  hostbuf, pu->port,
		  pu->pathlen, pu->lpart,
		  pu->lpartlen - pu->pathlen, pu->lpart + pu->pathlen,
		  pu->lpart == lpbuf ? "with" : "no"));

    return PU_OK;
}


/*  parse proxy url, return proxy structure  */
proxy_t *
parse_add_proxy(const char *srct, const char *proxy)
{
    char *pt, *authbuf;
    proxy_t *prox;
    host_t *host;
    whost_t *wh;
    int hnl, have_auth, len_auth;
    purl_t pu[1];
    char hostbuf[SHORTSTR], lpbuf[SHORTSTR];

    if (parse_url(srct, proxy, strlen(proxy), 0, 0,
		  pu, lpbuf, hostbuf, &hnl) != PU_OK)
	return 0;

    if (pu->auth) {
	len_auth = len_enc_auth(pu->authlen);
	if (!(authbuf = mmalloc(len_auth)))
	    return 0;
	encode_auth(authbuf, pu->auth, pu->authlen);
	have_auth = 1;
    } else {
	len_auth = 0;
	authbuf = 0;
	have_auth = 0;
    }

    for (prox = proxylist; prox; prox = prox->next)
	if (!memcmp(hostbuf, prox->host->name, hnl) &&
	      !prox->host->name[hnl] &&
	    prox->port == pu->port &&
	    !memcmp(pu->lpart, prox->cgi_path, pu->lpartlen) &&
	      !prox->cgi_path[pu->lpartlen] &&
	    prox->have_auth == have_auth &&
	    (!have_auth ||
	     !memcmp(prox->cgi_path + pu->lpartlen + 1, authbuf, len_auth)))
	    goto out;

    if ((host = host_lookup_fast(hostbuf, hnl)) && host->ready) {
	if (!host->info)
	    goto out;
    }

    if (!(prox = mmalloc(sizeof(*prox) + pu->lpartlen + 1 + 
			 (pu->authlen ? len_enc_auth(pu->authlen) : 0))))
	goto out;

    memcpy(prox->cgi_path, pu->lpart, pu->lpartlen);
    pt = prox->cgi_path + pu->lpartlen;
    *pt++ = '\0';
    memcpy(pt, authbuf, len_auth);

    prox->have_auth = have_auth;

    prox->port = pu->port;

    if (host) {
	prox->host = host;
	if (!host->ready) {
	    wh = (whost_t *)host->info;
	    wh->num_proxies++;
	    waiting_proxies++;
	}
    } else {
	if (!(wh = host_lookup_full(hostbuf, hnl))) {
	    free(prox);
	    prox = 0;
	    goto out;
	}
	prox->host = wh->host;
	wh->num_proxies++;
	waiting_proxies++;
    }

    prox->next = proxylist;
    proxylist = prox;

  out:
    if (pu->auth)
	free(authbuf);

    return prox;
}


int
test_pat(const char *path, int len, int fp,
	 const char *mtype, int mtl, url_parm_t *parm)
{
    ptrarr_t *sh;
    const char *pat;
    unsigned u;
    int l;

    sh = parm->opt->filter_list;
    if (sh->nents) {
	if (len == fp) {
	    path = parm->opt->index_filename ?
		   parm->opt->index_filename : DEFAULT_INDEX_FILE_NAME;
	    len = strlen(path);
	    fp = 0;
	}
	for (u = mtype ? sh->spare - 1 : 0; u < sh->nents; u++) {
	    pat = ((filter_t **)sh->ents)[u]->data;
	    if (((filter_t **)sh->ents)[u]->type) {
		if (!mtype)
		    return 2;
		if (!patternMatch(mtype, mtl, pat))
		    continue;
	    } else if (((filter_t **)sh->ents)[u]->pat) {
		if (!patternMatch(path + fp, len - fp, pat))
		    continue;
	    } else {
		l = strlen(pat);
		if (len - fp <= l ||
		    path[len - l - 1] != '.' ||
		    strncasecmp(path + len - l, pat, l))
		    continue;
	    }
	    return ((filter_t **)sh->ents)[u]->acc;
	}
	return !((filter_t **)sh->ents)[u - 1]->acc;
    }
    return 1;
}

static void
print_purl(aurl_t *au, purl_t *pu, const char *hostname, int hnl, int isreq)
{
    char *nbuf;
    int dl, nsiz;
    char abuf[32], buf[SHORTSTR];

    dl = 0;
    lcat_str(buf, SHORTSTR, dl, isreq ? "Requisite: http://" : "Link: http://");
    if (pu->host)
	lcat_str(buf, SHORTSTR, dl, pu->host->name);
    else {
	if (pu->auth) {
	    lcat_mem(buf, SHORTSTR, dl, pu->authlen, pu->auth);
	    lcat_chr(buf, SHORTSTR, dl, '@');
	}
	lcat_mem(buf, SHORTSTR, dl, hnl, hostname);
    }
    if (pu->port != 80)
	lcat_mem(buf, SHORTSTR, dl, sprintf(abuf, ":%d", pu->port), abuf);
    lcat_chr(buf, SHORTSTR, dl, '/');
    lcat_mem(buf, SHORTSTR, dl, pu->lpartlen, pu->lpart);
    lcat_chr(buf, SHORTSTR, dl, '\n');

    if (dl > SHORTSTR)
	return;

    if (au->hdrssiz < au->hdrslen + dl) {
	nsiz = au->hdrslen * 2 + dl;
	if (!(nbuf = mrealloc(au->headers, nsiz)))
	    return;
	au->headers = nbuf;
	au->hdrssiz = nsiz;
    }
    memcpy(au->headers + au->hdrslen, buf, dl);
    au->hdrslen += dl;
}

/*  parse the complete url string  */
int 
parse_add_url(const char *srct, const char *url, int len, url_t *base,
	      url_t *referer, url_parm_t *parm,
	      int isreq, int relocs, int link_depth,
	      aurl_t *au)
{
    url_t *u;
    host_t *host;
    whost_t *wh;
    int hash, hnl, saveit;
    purl_t pu[1];
    char hostbuf[SHORTSTR], lpbuf[SHORTSTR];

    if (max_urls && num_urls >= max_urls) {
	static int exce;
	if (!exce) {
	    prx(WRN, "URL count quota exceeded\n");
	    exce = 1;
	}
	return 0;
    }

    if (!parm)
	parm = referer->parm;
    parm->ref_count++; /* XXX should be done later */

    if (parse_url(srct, url, len, base,
		  (referer != 0) <= parm->opt->inhibit_cgiget,
		  pu, lpbuf, hostbuf, &hnl) != PU_OK)
	return 0;

    if (au) {
	print_purl(au, pu, hostbuf, hnl, isreq);
	if ((au->url->parm->opt->max_recurse &&
	     au->url->link_depth >= au->url->parm->opt->max_recurse) ||
	    ((isreq ? au->url->parm->opt->follow_src :
		      au->url->parm->opt->follow_href) == NOT_RECURSIVE))
	    return 0;
    }

  if (pu->host)
    host = pu->host;
  else {
    if (!(host = host_lookup_fast(hostbuf, hnl)) || !host->ready) {
	if (referer && economize_dns) {
	    dbg(URL, ("not adding '%.*s' (non-cached hostname)\n", len, url));
	    return 0;
	}
    } else {
	if (!host->info) {
	    /*  prx(ERR, "non-existent host in '%.*s'\n", len, url);  */
	    num_urls++;
	    num_urls_fail++;
	    write_psts(parm, url, len, !referer, 451);
	    return 0;
	}
	if (!same_host(referer, host, pu->port, isreq)) {
	    dbg(URL, ("not adding '%.*s' (different host)\n", len, url));
	    return 0;
	}
    }
  }

  if (!(saveit = test_pat(pu->lpart, pu->lpartlen, pu->pathlen, 0, 0, parm))) {
    if (parm->opt->follows_max == NOT_RECURSIVE ||
	(parm->opt->max_recurse && link_depth >= parm->opt->max_recurse))
    {
	prx(NFO, "not adding %.*s (rejected)\n", len, url);
	return 0;
    }
  } else
    saveit = !(parm->disposition && parm->disposition->devnull);

    if (host && host->ready &&
	find_url(pu->lpart, pu->lpartlen, host->info, pu->port, &hash))
	return 0;

    if (referer && !same_dir(pu->lpart, pu->lpartlen, referer, isreq))
	return 0;

    if (!(u = mmalloc(sizeof(*u) + (pu->lpartlen + 1))))
	return 0;

    memcpy(u->local_part, pu->lpart, pu->lpartlen);
    u->local_part[pu->lpartlen] = '\0';

    u->referer = referer;
    u->parm = parm;
    u->port = pu->port;
    u->path_len = pu->pathlen;
    u->is_requisite = isreq;
    u->save_content = saveit;
    u->relocs = relocs;
    u->link_depth = link_depth;
    if (referer)
	u->disp_pathoff = referer->disp_pathoff;
    else {
	/* needed for disposition and for same_dir with follow_* <= -r/-pr */
	int dp = pu->pathlen;
	while (--dp > 0 && pu->lpart[dp - 1] != '/');
	u->disp_pathoff = dp;
    }

    if (!pu->host && pu->auth) {
	detach_parm(u);
	if (!(u->parm->http_auth = mmalloc(len_enc_auth(pu->authlen)))) {
	    free_url(u);
	    return 0;
	}
	encode_auth(u->parm->http_auth, pu->auth, pu->authlen);
    } else if (referer && parm->http_auth &&
	       host && host->ready && host->info != referer->host->info)
    {
	detach_parm(u);
	u->parm->http_auth = 0;
    }

    if (host) {
	u->host = host;
	if (host->ready)
	    add_url(u, hash);
	else {
	    wh = (whost_t *)host->info;
	    cq_append(wh->urls, u);
	}
    } else {
	if (!(wh = host_lookup_full(hostbuf, hnl))) {
	    free(u);
	    return 0;
	}
	u->host = wh->host;
	cq_append(wh->urls, u);
    }

    return 1;
}

void
finish_whost(whost_t *wh)
{
    waiting_proxies -= wh->num_proxies;
    cq_consume(wh->urls, url_t, u, {
	cq_rm1st(wh->urls);
	if (wh->host->info) {
	    if (!same_host(u->referer, u->host, u->port, u->is_requisite)) {
		prxu(NFO, u, "not adding '$u' (different host)\n");
		free_url(u);
	    } else {
		int hash;
		if (find_url(u->local_part, strlen(u->local_part),
			     u->host->info, u->port, &hash))
		    free_url(u);
		else {
		    if (u->referer && u->parm->http_auth &&
			u->referer->parm->http_auth == u->parm->http_auth &&
	    		u->referer->host->info != u->host->info)
		    {
			detach_parm(u);
			u->parm->http_auth = 0;
		    }
		    add_url(u, hash);
		}
	    }
	} else {
	    num_urls++;
	    num_urls_fail++;
	    write_usts(u, 451);
	    free_url(u);
	}
    });
    wh->host = 0;
}

int 
queue_url(url_t *u)
{
    wurl_t *wu;

    if (!(wu = mmalloc(sizeof(*wu))))
	return 0;
    wu->url = u;
    cq_append(queue_urls_connect, wu);
    return 1;
}

static int
do_add_url(url_t *u, int hash)
{
    u->attempt = 0;

    if (u->parm->time_stamp && u->referer) {
	detach_parm(u);
	u->parm->time_stamp = 0;
    }

#define uhash(up) calc_hash(up->local_part) ^ (int)up->host->info ^ up->port

    h_add(urllist, real_num_urls, url_t, u, hash, return 0;, uhash);
    num_urls++;

    dbgu(URL, (u, "added $u\n"));
    return 1;
}

/*  add a url to the url chain and enqueue for processing  */
void 
add_url(url_t *u, int hash)
{
    if (do_add_url(u, hash))
	queue_url(u);
    else
	free(u);
}

void 
free_url(url_t *u)
{
    if (--u->parm->ref_count < 1) {
	/* XXX double frees. so let it leak ...
	if (u->parm->disposition)
	    free(u->parm->disposition);
	if (u->parm->http_auth)
	    free(u->parm->http_auth);
	*/
	free(u->parm);
    }
    free(u);
}

int 
detach_parm(url_t *u)
{
    url_parm_t *parm;

    if (u->parm->ref_count > 1) {
	if (!(parm = mmalloc(sizeof(*parm))))
	    return 0;
	memcpy(parm, u->parm, sizeof(*parm));
	parm->ref_count = 1;
	u->parm->ref_count--;
	u->parm = parm;
    }
    return 1;
}
