/var/lib/sorcery/modules/liburl

     1	#!/bin/bash
     2	#---------------------------------------------------------------------
     3	##
     4	## @Synopsis Functions that download and verify urls.
     5	##
     6	##
     7	## This file contains functions for downloading and verifying urls.
     8	## It does this by extracting information from a url with url handlers
     9	## specific to that url type. The url handler also determines a download
    10	## handler to actually download the url.
    11	##
    12	## FIXME
    13	## For example, the request to download
    14	## the following url is made through the generic F<url_download>
    15	## function:
    16	##
    17	##     http://machinename.com/path/to/file.tar.bz2
    18	##
    19	## The F<url_download> function parses the url prefix (in this
    20	## case, http) and passes the url to the http download handler
    21	## (F<url_http_download>).  A similar approach is used for url
    22	## verification.
    23	##
    24	## This file provides an infrastructure that makes it relatively easy
    25	## to add new url handlers.  In order to add new handlers, all that
    26	## has to be done is add a new file to the sorcerer library directory
    27	## with the new url handler functions defined in the file.  This new
    28	## file will automatically be discovered and used by the sorcerer
    29	## scripts.
    30	##
    31	## The following section describes how to add new url handlers in
    32	## a little more detail.
    33	##
    34	## <p>WRITING NEW URL HANDLERS</p>
    35	##
    36	## This section describes the steps needed to write new url handlers.
    37	##
    38	## <p>Decide on the Url Format</p>
    39	##
    40	## Urls must be of the form <prefix>://<address>.  The prefix should
    41	## be something unique.  Only the prefix is used by this script,
    42	## the address is not parsed or used, simply passed to the appropriate
    43	## url handler.
    44	##
    45	## <p>Create a File to Hold the New Url Handling Functions</p>
    46	##
    47	## In the SGL library directory (i.e., the directory pointed to by the
    48	## SGL_LIBRARY variable), create a new file called url_<prefix>.  For
    49	## example, if your new url prefix is I<xyz>, you should create a new
    50	## file called F<url_xyz>.  The file should be executable.
    51	##
    52	## <p>Implement Url Handlers</p>
    53	##
    54	## The next step is to write the actual functions that
    55	## will handle url requests and put them in the new file you just
    56	## created.  The functions that must be implemented are:
    57	## <pre>
    58	##    url_<URL_PREFIX>_bucketize <url>
    59	##    url_<URL_PREFIX>_crack <url>
    60	##    url_<URL_PREFIX>_expand <url>
    61	##    url_<URL_PREFIX>_hostname <url>
    62	##    url_<URL_PREFIX>_is_valid <url>
    63	##    url_<URL_PREFIX>_netselect <url>
    64	##    url_<URL_PREFIX>_verify <url>
    65	## </pre>
    66	## The easiest way to figure out what to do is to look at
    67	## one of the existing files (e.g., url_http handles http requests).
    68	##
    69	## <p>Handling Multiple Url Types in a Single File</p>
    70	##
    71	## It's perfectly valid for a file to handle mutlple types of urls.
    72	## The F<url_http> file actually handles ftp, http, and https urls.
    73	## Take a look at the file to see how it's done.
    74	##
    75	##
    76	## @Copyright Copyright 2002 by the Source Mage Team
    77	##
    78	#---------------------------------------------------------------------
    79	
    80	# Load library files (url_*) that contain url handlers
    81	#
    82	# (2002/09/29) added if so it only loads the stuff once
    83	if ! [[ $URL_HANDLER_FILES ]] ; then
    84	  URL_HANDLER_FILES=`ls $SGL_LIBRARY_MODULES/url_handlers/url_*[^~]`
    85	  for  url_handler_file  in  $URL_HANDLER_FILES;  do
    86	    [  -x $url_handler_file  ]                               &&
    87	    URL_PREFIX=`echo $url_handler_file | sed "s/.*\/url_//"` &&
    88	    URL_TYPES[${#URL_TYPES[@]}]=$URL_PREFIX                  &&
    89	    . $url_handler_file
    90	  done
    91	fi
    92	
    93	#---------------------------------------------------------------------
    94	## @param see url_download
    95	##
    96	## This is simply a wrapper around url_download which provides the
    97	## additional functionality of expanding and ranking urls.
    98	## Processes like summon should use this, more defined processes
    99	## like scribe and sorcery update will probably directly call url_download.
   100	##
   101	#---------------------------------------------------------------------
   102	function url_download_expand_sort() {
   103	  $STD_DEBUG
   104	  local target=$1
   105	  local url_list=$2
   106	  local hints=$3
   107	  local udl_target=$4
   108	  local udl_type=$5
   109	
   110	  local url expanded_urls sorted_urls
   111	
   112	  debug "liburl" "$FUNCNAME -- $@"
   113	  # you can't expect me to download something without urls...
   114	  [ -z "$url_list" ] && return 255
   115	
   116	  # ensure list is newline seperated
   117	  url_list="$(echo $url_list|tr ' ' '\n')"
   118	
   119	  # expand urls
   120	  url_expand_urls expanded_urls "$url_list"
   121	  if ! [[ "$expanded_urls" ]] ; then
   122	    message "No expanded urls! If you get this it is a sorcery bug"
   123	    return 1
   124	  fi
   125	  # limit the number of expanded urls, only the linux kernel reaches this
   126	  # limit and doing netselect on that many urls is absurd
   127	  expanded_urls=$(echo "$expanded_urls"|head -n 50)
   128	  debug "liburl" "expanded urls $expanded_urls"
   129	  # sort urls
   130	  url_sort_urls sorted_urls "$expanded_urls"
   131	  if ! [[ "$sorted_urls" ]] ; then
   132	    message "No sorted urls! If you get this it is a sorcery bug"
   133	    return 1
   134	  fi
   135	  # shorten the list to a manageable amount
   136	  sorted_urls=$(echo "$sorted_urls"|head -n 10)
   137	  debug "liburl" "sorted urls $sorted_urls"
   138	  url_download "$1" "$sorted_urls" "$3" "$4" "$5"
   139	}
   140	
   141	#---------------------------------------------------------------------
   142	## @param target        Expected target file or tree, this is only a suggestion, and the download handler may ignore it.
   143	## @param url_list      List of urls to get the target from
   144	## @param hints         Hints, these help">help">help the function determine what type of thing it is downloading or other tunables.
   145	## @param udl_target    Name of variable in which to store the name of the result directory or file
   146	## @param udl_type      Name of the variable in which to store the type of thing downloaded
   147	##
   148	## @return 0 if file could be downloaded
   149	## @return 1 otherwise
   150	## Downloads the specified url.  Returns true if file could be
   151	## downloaded, false otherwise.
   152	##
   153	##
   154	#---------------------------------------------------------------------
   155	function url_download() {
   156	  $STD_DEBUG
   157	  local target=$1
   158	  local url_list=$2
   159	  local hints=$3
   160	  local udl_target=$4
   161	  local udl_type=$5
   162	
   163	  local url
   164	
   165	  debug "liburl" "$FUNCNAME -- $@"
   166	
   167	  # you can't expect me to download something without urls...
   168	  [ -z "$url_list" ] && return 255
   169	
   170	  # sanity check urls
   171	  local valid_urls
   172	  url_get_valid_urls valid_urls "$url_list"
   173	  if ! [[ "$valid_urls" ]] ; then
   174	    message "No valid urls!"
   175	    return 1
   176	  fi
   177	
   178	  hash_reset dl_buckets
   179	  local bucket
   180	  # bucketize
   181	  for url in $valid_urls; do
   182	    bucket=$(url_bucketize "$url")
   183	    if [[ $bucket ]] ; then
   184	      hash_append dl_buckets "$bucket" "$url"
   185	    else
   186	      message "Failed to find a download handler for $url, this is likely to be a sorcery bug"
   187	    fi
   188	  done
   189	
   190	  # sort buckets, someday a better algorithm may be used, however
   191	  # this should be sufficient for now.
   192	  buckets=$(hash_get_table_fields dl_buckets|sort)
   193	
   194	  # iterate through buckets
   195	  for bucket in $buckets; do
   196	    dl_get_bucket "$bucket" "$target" "$(hash_get dl_buckets $bucket)" \
   197	                  "$hints" "$udl_target" "$udl_type"
   198	    rc=$?
   199	    if [[ $rc == 0 ]] ; then
   200	      debug "liburl" "url_download -- downloaded $url $udl_r_target $udl_r_type"
   201	      return 0
   202	    fi
   203	  done
   204	  return 1
   205	}
   206	
   207	
   208	#---------------------------------------------------------------------
   209	## @param urllist
   210	#---------------------------------------------------------------------
   211	function url_get_valid_urls() {
   212	  $STD_DEBUG
   213	  local upvar="$1"
   214	  local url_list="$2"
   215	  local tmp_list=$(
   216	    for url in $url_list; do
   217	      if url_is_valid "$url"; then
   218	        echo "$url"
   219	      fi
   220	    done
   221	  )
   222	  eval "$upvar=\"\$tmp_list\""
   223	}
   224	
   225	
   226	#---------------------------------------------------------------------
   227	## @param urllist
   228	##
   229	## Frontend to expand urls into a nice list.
   230	#---------------------------------------------------------------------
   231	function url_expand_urls() {
   232	  $STD_DEBUG
   233	  local upvar="$1"
   234	  local url_list="$2"
   235	  local tmp_list
   236	  # put the original urls in the front of the list
   237	  # awkuniq will remove duplicates thereafter
   238	  tmp_list="$({ echo "$url_list" ; for url in $url_list; do
   239	    # this should expand to something or return itself
   240	    url_expand "$url"
   241	  done ; } |awkuniq)"
   242	  eval "$upvar=\"\$tmp_list\""
   243	}
   244	
   245	#---------------------------------------------------------------------
   246	## @param urllist
   247	#---------------------------------------------------------------------
   248	function url_sort_urls() {
   249	  $STD_DEBUG
   250	  local upvar="$1"
   251	  local url_list="$2"
   252	  if [[ $NET_SELECT == on ]]  &&  spell_installed "netselect" ;  then
   253	    lock_resources "network" "network"
   254	    message -n "${CHECK_COLOR}Looking for the fastest mirror site...${DEFAULT_COLOR} "
   255	    dl_connect
   256	    url_list="$(url_rank $url_list 2> /dev/null)"
   257	    dl_disconnect
   258	    unlock_resources "network" "network"
   259	    message " done."
   260	  fi
   261	  eval "$upvar=\"\$url_list\""
   262	}
   263	
   264	#---------------------------------------------------------------------
   265	## @param urllist
   266	## @Stdout new list
   267	## Ranks the urls in order of speed from fastest to slowest using netselect
   268	## Makes use of url_<url_type>_hostname functions.
   269	##
   270	## If multiple urls from the same hostname are passed in, their ordering
   271	## is preserved, although that group of urls may move as a whole in
   272	## the list.
   273	#---------------------------------------------------------------------
   274	function url_rank() {
   275	
   276	  local urlList sortedList urlCounter
   277	  local finalList url_speed
   278	  local tmp_url tmp_prefix tmp_hostname tmp_list
   279	
   280	  debug "liburl" "unsorted list: $*"
   281	
   282	  urlList="$@"
   283	
   284	  # Even if theres one url it might have multiple A records and we'll
   285	  # want netselect to find the fastest one
   286	
   287	  # The take home message is that a url can be /any/ random string of text,
   288	  # and a url is officially defined by its handler.
   289	
   290	  # So we are having the handler give us the netselected output
   291	  # from there we are sticking things into an array and letting
   292	  # bash handle the sorting for us...
   293	  function url_rank_divide_by_prefix() {
   294	    local tmp_url=$1 tmp_prefix
   295	    tmp_prefix=$(url_get_prefix $tmp_url)                 &&
   296	    hash_append url_div_hash $tmp_prefix $tmp_url
   297	  }
   298	  iterate url_rank_divide_by_prefix " $IFS" $urlList
   299	
   300	  for tmp_prefix in $(hash_get_table_fields url_div_hash) ; do
   301	    hash_get_ref url_div_hash $tmp_prefix tmp_list
   302	    debug liburl "urls are $tmp_list"
   303	    # the awk command turns netselect output like this:
   304	    # 69 url1
   305	    # 234 url2
   306	    # into the following bash code, which is then executed
   307	    # url_speed[69]="${url_speed[69]} url1"
   308	    # url_speed[234]="${url_speed[234]} url2"
   309	    eval $(url_netselect $tmp_list|
   310	      awk '{ if ($2 > 0 && $2 < 65536 ) {
   311	               printf "url_speed[%s]=\"${url_speed[%s]} %s\";",$1,$1,$2
   312	             }
   313	           }')
   314	  done
   315	
   316	  # since we put things in url_speed indexed by speed, the * should
   317	  # expand back in sorted order
   318	
   319	  sortedList=$(echo ${url_speed[@]})
   320	  debug "liburl" "Ordered list pre-sanity check is $sortedList"
   321	
   322	  # if all sites are ICMP Unreachable, return the unsorted list instead of null.
   323	  if [[ -z "$sortedList" ]] ; then
   324	    debug "liburl" "Failed to expand list"
   325	    echo "$urlList"
   326	    return
   327	  fi
   328	
   329	  # try really hard not to lose urls by appending missing ones to the end
   330	  # of the list, please tell me if theres a faster way to do this
   331	  function url_rank_iterator2() {
   332	    real_list_find "$sortedList" "$1" || sortedList="$sortedList $1"
   333	  }
   334	  iterate url_rank_iterator2 " $IFS" $urlList
   335	
   336	  # just in case something failed along the way just return what we had
   337	  if [[ -z "$sortedList" ]] ; then
   338	    debug "liburl" "Ordering failed somewhere, giving back original input"
   339	    echo "$urlList"
   340	  else
   341	    debug "liburl" "Ordered URLs: $sortedList"
   342	    echo "$sortedList"
   343	  fi
   344	}
   345	
   346	#---------------------------------------------------------------------
   347	## @Type Private
   348	## @param url
   349	## @Stdout url prefix
   350	## @return 0 valid url
   351	## @return 1 otherwise
   352	## Takes a url and echos the url prefix.  Returns
   353	## true if a valid url could be found, returns false otherwise.
   354	##
   355	## This is the only place parsing of a url should take place outside of
   356	## a url_handler. Doing so elsewhere is bugworthy.
   357	##
   358	#---------------------------------------------------------------------
   359	function url_get_prefix()  {
   360	  $STD_DEBUG
   361	  local URL=$1
   362	  local  URL_PREFIX=${URL/:\/\/*}
   363	  [  -n  "$URL_PREFIX"  ]  &&
   364	  [  "$URL_PREFIX" != "$URL" ]  &&
   365	  echo    $URL_PREFIX
   366	}
   367	
   368	
   369	#---------------------------------------------------------------------
   370	## @Type Private
   371	## @param url
   372	## @param url prefix
   373	## @stdout url sans prefix
   374	#---------------------------------------------------------------------
   375	function url_strip_prefix() {
   376	  $STD_DEBUG
   377	  echo $1 | sed "s!^$2://!!"
   378	}
   379	
   380	#---------------------------------------------------------------------
   381	## url handler api functions. Use these to access a url specific
   382	## functionality/data.
   383	#---------------------------------------------------------------------
   384	
   385	#---------------------------------------------------------------------
   386	## @param url
   387	## @stdout dl handler
   388	##
   389	## Get the download handler for this url
   390	#---------------------------------------------------------------------
   391	function url_bucketize()  {
   392	  $STD_DEBUG
   393	  url_generic_apifunc bucketize $1
   394	}
   395	
   396	#---------------------------------------------------------------------
   397	## @param url
   398	##
   399	## Parse the url somehow. The results are url specific right now,
   400	## this is usually only called by dl handlers who know what url types
   401	## they can handle, and thus, understand the return value.
   402	#---------------------------------------------------------------------
   403	function url_crack()  {
   404	  $STD_DEBUG
   405	  url_generic_apifunc crack "$@"
   406	}
   407	
   408	#---------------------------------------------------------------------
   409	## @param url(s)
   410	## @stdout urls
   411	##
   412	## Attempt to get more similar urls to the given one based on the
   413	## sorcery mirrors files. Most url types simply expand to the input.
   414	#---------------------------------------------------------------------
   415	function url_expand()  {
   416	  $STD_DEBUG
   417	  url_generic_apifunc expand "$@"
   418	}
   419	
   420	#---------------------------------------------------------------------
   421	## @param url
   422	##
   423	## Verify the url, this usually means going out to the internet and
   424	## somehow determining if the link is good.
   425	#---------------------------------------------------------------------
   426	function url_verify()  {
   427	  $STD_DEBUG
   428	  url_generic_apifunc verify "$@"
   429	}
   430	
   431	#---------------------------------------------------------------------
   432	## @param url
   433	## @Stdout url hostname
   434	## @return 0 valid url
   435	## @return 1 otherwise
   436	## Takes a url and echos the url hostname. Returns
   437	## true if a hostname could be found, returns false otherwise.
   438	##
   439	#---------------------------------------------------------------------
   440	function url_hostname()  {
   441	  $STD_DEBUG
   442	  url_generic_apifunc hostname $1
   443	}
   444	
   445	#---------------------------------------------------------------------
   446	## @param url
   447	## @Stdout url netslect output
   448	## @return 0 valid url
   449	## @return 1 otherwise
   450	## Prints the netselect output from the url handlers attempt at
   451	## running netselect.
   452	##
   453	#---------------------------------------------------------------------
   454	function url_netselect()  {
   455	  $STD_DEBUG
   456	  url_generic_apifunc netselect "$@"
   457	}
   458	
   459	#---------------------------------------------------------------------
   460	## @param url
   461	## @return 0 valid url
   462	## @return 1 otherwise
   463	## Returns true if the given url is a valid url understood by the url
   464	## library, returns false otherwise.
   465	##
   466	#---------------------------------------------------------------------
   467	function url_is_valid()  {
   468	  $STD_DEBUG
   469	  url_generic_apifunc is_valid $1
   470	}
   471	
   472	
   473	#---------------------------------------------------------------------
   474	## @param function name
   475	## @param url(s)
   476	##
   477	## This implements the common code for simple url handler inheritence.
   478	## The above url api functions call this which then calls the handler
   479	## specific function, if it exists, or the default version. This
   480	## allows url handlers to only override functions as necessary.
   481	##
   482	## If multiple urls are given, the prefix is assumed of all of them
   483	## is assumed to be the same.
   484	##
   485	#---------------------------------------------------------------------
   486	function url_generic_apifunc() {
   487	  local tmp_func=$1
   488	  shift
   489	  local tmp_url=$1 tmp_prefix
   490	  tmp_prefix=$(url_get_prefix $1) &&
   491	  if misc_is_function url_${tmp_prefix}_${tmp_func}; then
   492	    url_${tmp_prefix}_${tmp_func} "$@"
   493	  else
   494	    url_default_${tmp_func} "$@"
   495	  fi
   496	}
   497	
   498	#---------------------------------------------------------------------
   499	## This software is free software; you can redistribute it and/or modify
   500	## it under the terms of the GNU General Public License as published by
   501	## the Free Software Foundation; either version 2 of the License, or
   502	## (at your option) any later version.
   503	##
   504	## This software is distributed in the hope that it will be useful,
   505	## but WITHOUT ANY WARRANTY; without even the implied warranty of
   506	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   507	## GNU General Public License for more details.
   508	##
   509	## You should have received a copy of the GNU General Public License
   510	## along with this software; if not, write to the Free Software
   511	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   512	#---------------------------------------------------------------------