# KEHOME/bin/html2url
# rhm Oct/15/2002

# html2url file.html ...
# get href URLs from HTML file
# quote URLs
# keep ulabel as second field
# delete duplicates
# output format:
#    url; ulabel

for fin in "$@"; do
    base="${fin%.html}"
    base="${base%.htm}"
    fout="${base}.url"

    cat "$fin"		|
    htmlone		|  # one HTML command per line
    grep -i 'href="' 	|
    sed 's/HREF/href/g'	|
    sed 's/^.*href="//'	|  # delete leading junk & quote mark
    sed 's/ *$//'	|  # delete trailing blanks
    sed 's/">/"; /'	|  # semicolon separator for ulabel
    sed 's/^/"/'	|  # insert leading quote mark
    sort -u 		>"$fout"
done
