#!/usr/bin/python # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. # # # fetch.py - a file download utility # # A simple program similiar to wget(1), but handles local file copy, ignores # directories, and verifies file hashes. # import os import sys import shutil from urllib import splittype from urllib2 import urlopen from urllib2 import Request import hashlib def printIOError(e, txt): """ Function to decode and print IOError type exception """ print "I/O Error: " + txt + ": " try: (code, message) = e print str(message) + " (" + str(code) + ")" except: print str(e) def validate(file, hash): algorithm, hashvalue = hash.split(':') # force migration away from sha1 if algorithm == "sha1": algorithm = "sha256" try: m = hashlib.new(algorithm) except ValueError: return False while True: try: block = file.read() except IOError, err: print str(err), break m.update(block) if block == '': break return "%s:%s" % (algorithm, m.hexdigest()) def validate_container(filename, hash): try: file = open(filename, 'r') except IOError as e: printIOError(e, "Can't open file " + filename) return False return validate(file, hash) def validate_payload(filename, hash): import re import gzip import bz2 expr_bz = re.compile('.+\.bz2$', re.IGNORECASE) expr_gz = re.compile('.+\.gz$', re.IGNORECASE) expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE) try: if expr_bz.match(filename): file = bz2.BZ2File(filename, 'r') elif expr_gz.match(filename): file = gzip.GzipFile(filename, 'r') elif expr_tgz.match(filename): file = gzip.GzipFile(filename, 'r') else: return False except IOError as e: printIOError(e, "Can't open archive " + filename) return False return validate(file, hash) def download(url, filename = None, user_agent_arg = None): src = None try: req = Request(url) if user_agent_arg != None: req.add_header("User-Agent", user_agent_arg) src = urlopen(req) except IOError as e: printIOError(e, "Can't open url " + url) return None # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action if src.getcode() and (3 <= int(src.getcode()/100) <= 5): print "Error code: " + str(src.getcode()) return None if filename == None: filename = src.geturl().split('/')[-1] try: dst = open(filename, 'wb'); except IOError as e: printIOError(e, "Can't open file " + filename + " for writing") src.close() return None while True: block = src.read() if block == '': break; dst.write(block) src.close() dst.close() # return the name of the file that we downloaded the data to. return filename def download_paths(search, filename, url): urls = list() if filename != None: tmp = os.getenv('DOWNLOAD_SEARCH_PATH') if tmp: search += tmp.split(' ') file = os.path.basename(filename) urls = [ base + '/' + file for base in search ] # filename should always be first if filename in urls: urls.remove(filename) urls.insert(0, filename) # command line url is a fallback, so it's last if url != None and url not in urls: urls.append(url) return urls def usage(): print "Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] " \ "[-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] " \ "-u|--url (url)" % (sys.argv[0].split('/')[-1]) sys.exit(1) def main(): import getopt # FLUSH STDOUT sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) user_agent_arg = None file_arg = None link_arg = False keep_arg = False hash_arg = None url_arg = None search_list = list() try: opts, args = getopt.getopt(sys.argv[1:], "a:f:h:lks:u:", ["file=", "link", "keep", "hash=", "search=", "url=", "user-agent="]) except getopt.GetoptError, err: print str(err) usage() for opt, arg in opts: if opt in [ "-a", "--user-agent" ]: user_agent_arg = arg elif opt in [ "-f", "--file" ]: file_arg = arg elif opt in [ "-l", "--link" ]: link_arg = True elif opt in [ "-k", "--keep" ]: keep_arg = True elif opt in [ "-h", "--hash" ]: hash_arg = arg elif opt in [ "-s", "--search" ]: search_list.append(arg) elif opt in [ "-u", "--url" ]: url_arg = arg else: assert False, "unknown option" if url_arg == None: usage() for url in download_paths(search_list, file_arg, url_arg): print "Source %s..." % url, scheme, path = splittype(url) name = file_arg if scheme in [ None, 'file' ]: if os.path.exists(path) == False: print "not found, skipping file copy" continue elif name != path: if link_arg == False: print "\n copying..." shutil.copy2(path, name) else: print "\n linking..." os.symlink(path, name) else: pass elif scheme in [ 'http', 'https', 'ftp' ]: print "\n downloading...", name = download(url, file_arg, user_agent_arg) if name == None: print "failed" continue print "\n validating...", if hash_arg == None: print "skipping (no hash)" sys.exit(0) realhash = validate_container(name, hash_arg) if realhash == hash_arg: print "ok" sys.exit(0) else: payloadhash = validate_payload(name, hash_arg) if payloadhash == hash_arg: print "ok" sys.exit(0) print "corruption detected" print " expected: %s" % hash_arg print " actual: %s" % realhash print " payload: %s" % payloadhash if keep_arg == False: try: print "\nWARN: Removing the corrupt downloaded file" os.remove(name) except OSError: pass else: print "\nINFO: Keeping the downloaded file because asked to" sys.exit(1) if __name__ == "__main__": main()