/var/lib/sorcery/modules/liburl
1 #!/bin/bash
2 #---------------------------------------------------------------------
3 ##
4 ## @Synopsis Functions that download and verify urls.
5 ##
6 ##
7 ## This file contains functions for downloading and verifying urls.
8 ## It does this by extracting information from a url with url handlers
9 ## specific to that url type. The url handler also determines a download
10 ## handler to actually download the url.
11 ##
12 ## FIXME
13 ## For example, the request to download
14 ## the following url is made through the generic F<url_download>
15 ## function:
16 ##
17 ## http://machinename.com/path/to/file.tar.bz2
18 ##
19 ## The F<url_download> function parses the url prefix (in this
20 ## case, http) and passes the url to the http download handler
21 ## (F<url_http_download>). A similar approach is used for url
22 ## verification.
23 ##
24 ## This file provides an infrastructure that makes it relatively easy
25 ## to add new url handlers. In order to add new handlers, all that
26 ## has to be done is add a new file to the sorcerer library directory
27 ## with the new url handler functions defined in the file. This new
28 ## file will automatically be discovered and used by the sorcerer
29 ## scripts.
30 ##
31 ## The following section describes how to add new url handlers in
32 ## a little more detail.
33 ##
34 ## <p>WRITING NEW URL HANDLERS</p>
35 ##
36 ## This section describes the steps needed to write new url handlers.
37 ##
38 ## <p>Decide on the Url Format</p>
39 ##
40 ## Urls must be of the form <prefix>://<address>. The prefix should
41 ## be something unique. Only the prefix is used by this script,
42 ## the address is not parsed or used, simply passed to the appropriate
43 ## url handler.
44 ##
45 ## <p>Create a File to Hold the New Url Handling Functions</p>
46 ##
47 ## In the SGL library directory (i.e., the directory pointed to by the
48 ## SGL_LIBRARY variable), create a new file called url_<prefix>. For
49 ## example, if your new url prefix is I<xyz>, you should create a new
50 ## file called F<url_xyz>. The file should be executable.
51 ##
52 ## <p>Implement Url Handlers</p>
53 ##
54 ## The next step is to write the actual functions that
55 ## will handle url requests and put them in the new file you just
56 ## created. The functions that must be implemented are:
57 ## <pre>
58 ## url_<URL_PREFIX>_bucketize <url>
59 ## url_<URL_PREFIX>_crack <url>
60 ## url_<URL_PREFIX>_expand <url>
61 ## url_<URL_PREFIX>_hostname <url>
62 ## url_<URL_PREFIX>_is_valid <url>
63 ## url_<URL_PREFIX>_netselect <url>
64 ## url_<URL_PREFIX>_verify <url>
65 ## </pre>
66 ## The easiest way to figure out what to do is to look at
67 ## one of the existing files (e.g., url_http handles http requests).
68 ##
69 ## <p>Handling Multiple Url Types in a Single File</p>
70 ##
71 ## It's perfectly valid for a file to handle mutlple types of urls.
72 ## The F<url_http> file actually handles ftp, http, and https urls.
73 ## Take a look at the file to see how it's done.
74 ##
75 ##
76 ## @Copyright Copyright 2002 by the Source Mage Team
77 ##
78 #---------------------------------------------------------------------
79
80 # Load library files (url_*) that contain url handlers
81 #
82 # (2002/09/29) added if so it only loads the stuff once
83 if ! [[ $URL_HANDLER_FILES ]] ; then
84 URL_HANDLER_FILES=`ls $SGL_LIBRARY_MODULES/url_handlers/url_*[^~]`
85 for url_handler_file in $URL_HANDLER_FILES; do
86 [ -x $url_handler_file ] &&
87 URL_PREFIX=`echo $url_handler_file | sed "s/.*\/url_//"` &&
88 URL_TYPES[${#URL_TYPES[@]}]=$URL_PREFIX &&
89 . $url_handler_file
90 done
91 fi
92
93 #---------------------------------------------------------------------
94 ## @param see url_download
95 ##
96 ## This is simply a wrapper around url_download which provides the
97 ## additional functionality of expanding and ranking urls.
98 ## Processes like summon should use this, more defined processes
99 ## like scribe and sorcery update will probably directly call url_download.
100 ##
101 #---------------------------------------------------------------------
102 function url_download_expand_sort() {
103 $STD_DEBUG
104 local target=$1
105 local url_list=$2
106 local hints=$3
107 local udl_target=$4
108 local udl_type=$5
109
110 local url expanded_urls sorted_urls
111
112 debug "liburl" "$FUNCNAME -- $@"
113 # you can't expect me to download something without urls...
114 [ -z "$url_list" ] && return 255
115
116 # ensure list is newline seperated
117 url_list="$(echo $url_list|tr ' ' '\n')"
118
119 # expand urls
120 url_expand_urls expanded_urls "$url_list"
121 if ! [[ "$expanded_urls" ]] ; then
122 message "No expanded urls! If you get this it is a sorcery bug"
123 return 1
124 fi
125 # limit the number of expanded urls, only the linux kernel reaches this
126 # limit and doing netselect on that many urls is absurd
127 expanded_urls=$(echo "$expanded_urls"|head -n 50)
128 debug "liburl" "expanded urls $expanded_urls"
129 # sort urls
130 url_sort_urls sorted_urls "$expanded_urls"
131 if ! [[ "$sorted_urls" ]] ; then
132 message "No sorted urls! If you get this it is a sorcery bug"
133 return 1
134 fi
135 # shorten the list to a manageable amount
136 sorted_urls=$(echo "$sorted_urls"|head -n 10)
137 debug "liburl" "sorted urls $sorted_urls"
138 url_download "$1" "$sorted_urls" "$3" "$4" "$5"
139 }
140
141 #---------------------------------------------------------------------
142 ## @param target Expected target file or tree, this is only a suggestion, and the download handler may ignore it.
143 ## @param url_list List of urls to get the target from
144 ## @param hints Hints, these help">help">help the function determine what type of thing it is downloading or other tunables.
145 ## @param udl_target Name of variable in which to store the name of the result directory or file
146 ## @param udl_type Name of the variable in which to store the type of thing downloaded
147 ##
148 ## @return 0 if file could be downloaded
149 ## @return 1 otherwise
150 ## Downloads the specified url. Returns true if file could be
151 ## downloaded, false otherwise.
152 ##
153 ##
154 #---------------------------------------------------------------------
155 function url_download() {
156 $STD_DEBUG
157 local target=$1
158 local url_list=$2
159 local hints=$3
160 local udl_target=$4
161 local udl_type=$5
162
163 local url
164
165 debug "liburl" "$FUNCNAME -- $@"
166
167 # you can't expect me to download something without urls...
168 [ -z "$url_list" ] && return 255
169
170 # sanity check urls
171 local valid_urls
172 url_get_valid_urls valid_urls "$url_list"
173 if ! [[ "$valid_urls" ]] ; then
174 message "No valid urls!"
175 return 1
176 fi
177
178 hash_reset dl_buckets
179 local bucket
180 # bucketize
181 for url in $valid_urls; do
182 bucket=$(url_bucketize "$url")
183 if [[ $bucket ]] ; then
184 hash_append dl_buckets "$bucket" "$url"
185 else
186 message "Failed to find a download handler for $url, this is likely to be a sorcery bug"
187 fi
188 done
189
190 # sort buckets, someday a better algorithm may be used, however
191 # this should be sufficient for now.
192 buckets=$(hash_get_table_fields dl_buckets|sort)
193
194 # iterate through buckets
195 for bucket in $buckets; do
196 dl_get_bucket "$bucket" "$target" "$(hash_get dl_buckets $bucket)" \
197 "$hints" "$udl_target" "$udl_type"
198 rc=$?
199 if [[ $rc == 0 ]] ; then
200 debug "liburl" "url_download -- downloaded $url $udl_r_target $udl_r_type"
201 return 0
202 fi
203 done
204 return 1
205 }
206
207
208 #---------------------------------------------------------------------
209 ## @param urllist
210 #---------------------------------------------------------------------
211 function url_get_valid_urls() {
212 $STD_DEBUG
213 local upvar="$1"
214 local url_list="$2"
215 local tmp_list=$(
216 for url in $url_list; do
217 if url_is_valid "$url"; then
218 echo "$url"
219 fi
220 done
221 )
222 eval "$upvar=\"\$tmp_list\""
223 }
224
225
226 #---------------------------------------------------------------------
227 ## @param urllist
228 ##
229 ## Frontend to expand urls into a nice list.
230 #---------------------------------------------------------------------
231 function url_expand_urls() {
232 $STD_DEBUG
233 local upvar="$1"
234 local url_list="$2"
235 local tmp_list
236 # put the original urls in the front of the list
237 # awkuniq will remove duplicates thereafter
238 tmp_list="$({ echo "$url_list" ; for url in $url_list; do
239 # this should expand to something or return itself
240 url_expand "$url"
241 done ; } |awkuniq)"
242 eval "$upvar=\"\$tmp_list\""
243 }
244
245 #---------------------------------------------------------------------
246 ## @param urllist
247 #---------------------------------------------------------------------
248 function url_sort_urls() {
249 $STD_DEBUG
250 local upvar="$1"
251 local url_list="$2"
252 if [[ $NET_SELECT == on ]] && spell_installed "netselect" ; then
253 lock_resources "network" "network"
254 message -n "${CHECK_COLOR}Looking for the fastest mirror site...${DEFAULT_COLOR} "
255 dl_connect
256 url_list="$(url_rank $url_list 2> /dev/null)"
257 dl_disconnect
258 unlock_resources "network" "network"
259 message " done."
260 fi
261 eval "$upvar=\"\$url_list\""
262 }
263
264 #---------------------------------------------------------------------
265 ## @param urllist
266 ## @Stdout new list
267 ## Ranks the urls in order of speed from fastest to slowest using netselect
268 ## Makes use of url_<url_type>_hostname functions.
269 ##
270 ## If multiple urls from the same hostname are passed in, their ordering
271 ## is preserved, although that group of urls may move as a whole in
272 ## the list.
273 #---------------------------------------------------------------------
274 function url_rank() {
275
276 local urlList sortedList urlCounter
277 local finalList url_speed
278 local tmp_url tmp_prefix tmp_hostname tmp_list
279
280 debug "liburl" "unsorted list: $*"
281
282 urlList="$@"
283
284 # Even if theres one url it might have multiple A records and we'll
285 # want netselect to find the fastest one
286
287 # The take home message is that a url can be /any/ random string of text,
288 # and a url is officially defined by its handler.
289
290 # So we are having the handler give us the netselected output
291 # from there we are sticking things into an array and letting
292 # bash handle the sorting for us...
293 function url_rank_divide_by_prefix() {
294 local tmp_url=$1 tmp_prefix
295 tmp_prefix=$(url_get_prefix $tmp_url) &&
296 hash_append url_div_hash $tmp_prefix $tmp_url
297 }
298 iterate url_rank_divide_by_prefix " $IFS" $urlList
299
300 for tmp_prefix in $(hash_get_table_fields url_div_hash) ; do
301 hash_get_ref url_div_hash $tmp_prefix tmp_list
302 debug liburl "urls are $tmp_list"
303 # the awk command turns netselect output like this:
304 # 69 url1
305 # 234 url2
306 # into the following bash code, which is then executed
307 # url_speed[69]="${url_speed[69]} url1"
308 # url_speed[234]="${url_speed[234]} url2"
309 eval $(url_netselect $tmp_list|
310 awk '{ if ($2 > 0 && $2 < 65536 ) {
311 printf "url_speed[%s]=\"${url_speed[%s]} %s\";",$1,$1,$2
312 }
313 }')
314 done
315
316 # since we put things in url_speed indexed by speed, the * should
317 # expand back in sorted order
318
319 sortedList=$(echo ${url_speed[@]})
320 debug "liburl" "Ordered list pre-sanity check is $sortedList"
321
322 # if all sites are ICMP Unreachable, return the unsorted list instead of null.
323 if [[ -z "$sortedList" ]] ; then
324 debug "liburl" "Failed to expand list"
325 echo "$urlList"
326 return
327 fi
328
329 # try really hard not to lose urls by appending missing ones to the end
330 # of the list, please tell me if theres a faster way to do this
331 function url_rank_iterator2() {
332 real_list_find "$sortedList" "$1" || sortedList="$sortedList $1"
333 }
334 iterate url_rank_iterator2 " $IFS" $urlList
335
336 # just in case something failed along the way just return what we had
337 if [[ -z "$sortedList" ]] ; then
338 debug "liburl" "Ordering failed somewhere, giving back original input"
339 echo "$urlList"
340 else
341 debug "liburl" "Ordered URLs: $sortedList"
342 echo "$sortedList"
343 fi
344 }
345
346 #---------------------------------------------------------------------
347 ## @Type Private
348 ## @param url
349 ## @Stdout url prefix
350 ## @return 0 valid url
351 ## @return 1 otherwise
352 ## Takes a url and echos the url prefix. Returns
353 ## true if a valid url could be found, returns false otherwise.
354 ##
355 ## This is the only place parsing of a url should take place outside of
356 ## a url_handler. Doing so elsewhere is bugworthy.
357 ##
358 #---------------------------------------------------------------------
359 function url_get_prefix() {
360 $STD_DEBUG
361 local URL=$1
362 local URL_PREFIX=${URL/:\/\/*}
363 [ -n "$URL_PREFIX" ] &&
364 [ "$URL_PREFIX" != "$URL" ] &&
365 echo $URL_PREFIX
366 }
367
368
369 #---------------------------------------------------------------------
370 ## @Type Private
371 ## @param url
372 ## @param url prefix
373 ## @stdout url sans prefix
374 #---------------------------------------------------------------------
375 function url_strip_prefix() {
376 $STD_DEBUG
377 echo $1 | sed "s!^$2://!!"
378 }
379
380 #---------------------------------------------------------------------
381 ## url handler api functions. Use these to access a url specific
382 ## functionality/data.
383 #---------------------------------------------------------------------
384
385 #---------------------------------------------------------------------
386 ## @param url
387 ## @stdout dl handler
388 ##
389 ## Get the download handler for this url
390 #---------------------------------------------------------------------
391 function url_bucketize() {
392 $STD_DEBUG
393 url_generic_apifunc bucketize $1
394 }
395
396 #---------------------------------------------------------------------
397 ## @param url
398 ##
399 ## Parse the url somehow. The results are url specific right now,
400 ## this is usually only called by dl handlers who know what url types
401 ## they can handle, and thus, understand the return value.
402 #---------------------------------------------------------------------
403 function url_crack() {
404 $STD_DEBUG
405 url_generic_apifunc crack "$@"
406 }
407
408 #---------------------------------------------------------------------
409 ## @param url(s)
410 ## @stdout urls
411 ##
412 ## Attempt to get more similar urls to the given one based on the
413 ## sorcery mirrors files. Most url types simply expand to the input.
414 #---------------------------------------------------------------------
415 function url_expand() {
416 $STD_DEBUG
417 url_generic_apifunc expand "$@"
418 }
419
420 #---------------------------------------------------------------------
421 ## @param url
422 ##
423 ## Verify the url, this usually means going out to the internet and
424 ## somehow determining if the link is good.
425 #---------------------------------------------------------------------
426 function url_verify() {
427 $STD_DEBUG
428 url_generic_apifunc verify "$@"
429 }
430
431 #---------------------------------------------------------------------
432 ## @param url
433 ## @Stdout url hostname
434 ## @return 0 valid url
435 ## @return 1 otherwise
436 ## Takes a url and echos the url hostname. Returns
437 ## true if a hostname could be found, returns false otherwise.
438 ##
439 #---------------------------------------------------------------------
440 function url_hostname() {
441 $STD_DEBUG
442 url_generic_apifunc hostname $1
443 }
444
445 #---------------------------------------------------------------------
446 ## @param url
447 ## @Stdout url netslect output
448 ## @return 0 valid url
449 ## @return 1 otherwise
450 ## Prints the netselect output from the url handlers attempt at
451 ## running netselect.
452 ##
453 #---------------------------------------------------------------------
454 function url_netselect() {
455 $STD_DEBUG
456 url_generic_apifunc netselect "$@"
457 }
458
459 #---------------------------------------------------------------------
460 ## @param url
461 ## @return 0 valid url
462 ## @return 1 otherwise
463 ## Returns true if the given url is a valid url understood by the url
464 ## library, returns false otherwise.
465 ##
466 #---------------------------------------------------------------------
467 function url_is_valid() {
468 $STD_DEBUG
469 url_generic_apifunc is_valid $1
470 }
471
472
473 #---------------------------------------------------------------------
474 ## @param function name
475 ## @param url(s)
476 ##
477 ## This implements the common code for simple url handler inheritence.
478 ## The above url api functions call this which then calls the handler
479 ## specific function, if it exists, or the default version. This
480 ## allows url handlers to only override functions as necessary.
481 ##
482 ## If multiple urls are given, the prefix is assumed of all of them
483 ## is assumed to be the same.
484 ##
485 #---------------------------------------------------------------------
486 function url_generic_apifunc() {
487 local tmp_func=$1
488 shift
489 local tmp_url=$1 tmp_prefix
490 tmp_prefix=$(url_get_prefix $1) &&
491 if misc_is_function url_${tmp_prefix}_${tmp_func}; then
492 url_${tmp_prefix}_${tmp_func} "$@"
493 else
494 url_default_${tmp_func} "$@"
495 fi
496 }
497
498 #---------------------------------------------------------------------
499 ## This software is free software; you can redistribute it and/or modify
500 ## it under the terms of the GNU General Public License as published by
501 ## the Free Software Foundation; either version 2 of the License, or
502 ## (at your option) any later version.
503 ##
504 ## This software is distributed in the hope that it will be useful,
505 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
506 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
507 ## GNU General Public License for more details.
508 ##
509 ## You should have received a copy of the GNU General Public License
510 ## along with this software; if not, write to the Free Software
511 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
512 #---------------------------------------------------------------------