diff --git a/.gitignore b/.gitignore index d2d6f36..0f1f838 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ develop-eggs .installed.cfg lib lib64 +*idea # Installer logs pip-log.txt diff --git a/README.rst b/README.rst index 7cb85ab..7bf3a46 100644 --- a/README.rst +++ b/README.rst @@ -8,7 +8,7 @@ get_modis Description -------------- -This repository contains a Python script (and executable) that allows one to download MODIS data granules for different products and periods. +This repository contains a Python 3.5 script (and executable) that allows one to download MODIS data granules for different products, periods and non-reprojected images (with lat/lon coordinates). The code is quite simple and generic, and should work with most standard Python installations. @@ -38,6 +38,8 @@ issuing the ``-h`` or ``--help`` commands: collection number), the year, the MODIS reference tile and additionally, where to save the data to, and whether to verbose. The user may also select a temporal period in terms of days of year. + New: Now this program is compatible with Python 3.5 and it can download non-reprojected + MODIS images with lat/lon coordinates (now it parses the hdf associated xml for the bounding box). EXAMPLES @@ -72,6 +74,7 @@ issuing the ``-h`` or ``--help`` commands: MODIS product name with collection tag at the end (e.g. MOD09GA.005) --tile=TILE, -t TILE Required tile (h17v04, for example) + --coordinates=COORDINATES, -c COORDINATES Required bounding box in the form: lat,lon,lat,lon (upperleft, downright point) --year=YEAR, -y YEAR Year of interest --output=DIR_OUT, -o DIR_OUT Output directory @@ -89,5 +92,6 @@ Useful things to bear in mind: * The product must have an indication of the collection follwing the product name. i.e. ``MCD45A1.005``) * The ``--begin`` and ``--end`` flags are optional, and yu can ignore them if you just want the complete year * Use the ``--proxy`` option to set the required proxy. It should be read from the environment variable, but this is added flexiblity +* Use ``-t`` for TILE **OR** ``-c`` for COORDINATES NOT AT THE SAME TIME, otherwise it downloads only by coordinates The code has some logic not to download files several times, and the overall behaviour rests on the ``--quick`` flag: if this flag is **not** set, then the program will look at the remote available files and skip any files that are present and have the same file size as the remote. In some cases, this could lead to duplicities are re-processing takes place. If the ``--quick`` flag is set, then when the remote list of available dates is created, any present files that match the requested product and year will be ignored, irrespective of file size. This can mean that files that failed to download half way through will not the downloaded, but it's an overall faster process if a download failed halfway through a year. diff --git a/get_modis.py b/get_modis.py index 1ef5a94..44963c5 100755 --- a/get_modis.py +++ b/get_modis.py @@ -4,9 +4,9 @@ SYNOPSIS ./get_modis.py [-h,--help] [--verbose, -v] [--platform=PLATFORM, -s PLATFORM]\ - [--proxy=PROXY -p PROXY] \ - [--product=PRODUCT, -p PRODUCT] [--tile=TILE, -t TILE] \ - [--year=YEAR, -y YEAR] [--output=DIR_OUT, -o DIR_OUT] \ + [--proxy=PROXY -p PROXY] + [--product=PRODUCT, -p PRODUCT] [--tile=TILE, -t TILE] + [--year=YEAR, -y YEAR] [--output=DIR_OUT, -o DIR_OUT] [--begin=DOY_START, -b DOY_START] [--end=DOY_END, -e DOY_END] DESCRIPTION @@ -23,13 +23,13 @@ The following example downloads daily surface reflectance from the TERRA platform for tile h17v04 for 2004, between DoY 153 and 243: - $ ./get_modis.py -v -p MOD09GA.005 -s MOLT -y 2004 -t h17v04 -o /tmp/ \ + $ ./get_modis.py -v -p MOD09GA.005 -s MOLT -y 2004 -t h17v04 -o /tmp/ -b 153 -e 243 The script will also work with monthly or 8-daily composites. Here's how you download the monthly MCD45A1 (burned area) product for the same period: - $ ./get_modis.py -v -p MCD45A1.005 -s MOTA -y 2004 -t h17v04 -o /tmp/ \ + $ ./get_modis.py -v -p MCD45A1.005 -s MOTA -y 2004 -t h17v04 -o /tmp/ -b 153 -e 243 @@ -44,24 +44,27 @@ """ import optparse import os -import urllib2 +import urllib.request as urllib2 import time import calendar import shutil import logging import sys import fnmatch +from lxml import etree +from shapely.geometry import Polygon, MultiPoint -LOG = logging.getLogger( __name__ ) -OUT_HDLR = logging.StreamHandler( sys.stdout ) -OUT_HDLR.setFormatter( logging.Formatter( '%(asctime)s %(message)s') ) -OUT_HDLR.setLevel( logging.INFO ) -LOG.addHandler( OUT_HDLR ) -LOG.setLevel( logging.INFO ) +LOG = logging.getLogger(__name__) +OUT_HDLR = logging.StreamHandler(sys.stdout) +OUT_HDLR.setFormatter(logging.Formatter('%(asctime)s %(message)s')) +OUT_HDLR.setLevel(logging.INFO) +LOG.addHandler(OUT_HDLR) +LOG.setLevel(logging.INFO) -HEADERS = { 'User-Agent' : 'get_modis Python 1.3.0' } +HEADERS = {'User-Agent': 'get_modis Python 3.5.0'} -def parse_modis_dates ( url, dates, product, out_dir, ruff=False ): + +def parse_modis_dates(url, dates, product, out_dir, verbose, ruff=False): """Parse returned MODIS dates. This function gets the dates listing for a given MODIS products, and @@ -90,46 +93,75 @@ def parse_modis_dates ( url, dates, product, out_dir, ruff=False ): """ if ruff: product = product.split(".")[0] - already_here = fnmatch.filter ( os.listdir ( out_dir ), "%s*hdf" % product ) - already_here_dates = [ x.split(".")[-5][1:] \ - for x in already_here ] - - req = urllib2.Request ( "%s" % ( url ), None, HEADERS) - html = urllib2.urlopen(req).readlines() - + already_here = fnmatch.filter(os.listdir(out_dir), "%s*hdf" % product) + already_here_dates = [x.split(".")[-5][1:] for x in already_here] + + req = urllib2.Request("%s" % url, None, HEADERS) + html = str(urllib2.urlopen(req).readlines()) + lines = html.split('') + available_dates = [] - for line in html: - - if line.find ( "href" ) >= 0 and line.find ( "[DIR]" ) >= 0: + for line in lines: + if line.find("href") >= 0 and line.find("[DIR]") >= 0: # Points to a directory the_date = line.split('href="')[1].split('"')[0].strip("/") - + if ruff: try: - modis_date = time.strftime( "%Y%j", time.strptime( \ - the_date, "%Y.%m.%d") ) + modis_date = time.strftime("%Y%j", time.strptime(the_date, "%Y.%m.%d")) except ValueError: continue if modis_date in already_here_dates: continue else: - available_dates.append ( the_date ) + available_dates.append(the_date) else: - available_dates.append ( the_date ) - - - - dates = set ( dates ) - available_dates = set ( available_dates ) - suitable_dates = list( dates.intersection( available_dates ) ) + available_dates.append(the_date) + + dates = set(dates) + available_dates = set(available_dates) + suitable_dates = list(dates.intersection(available_dates)) suitable_dates.sort() + if verbose: + LOG.info(suitable_dates) return suitable_dates - - -def get_modisfiles ( platform, product, year, tile, proxy, \ - doy_start=1, doy_end = -1, \ - base_url="http://e4ftl01.cr.usgs.gov", out_dir=".", ruff=False, verbose=False ): + +def parse_modis_coordinates(url_xml, coordinates, verbose): + upperleft = (float(coordinates.split(',')[0]), float(coordinates.split(',')[1])) # lat, lon + downright = (float(coordinates.split(',')[2]), float(coordinates.split(',')[3])) # lat, lon + upperright = (upperleft[0], downright[1]) + downleft = (downright[0], upperleft[1]) + requested_bbox = Polygon((upperleft, upperright, downright, downleft)) + if verbose: + LOG.info("UL: LAT -> %s, LON -> %s" % upperleft) + LOG.info("DR: LAT -> %s, LON -> %s" % downright) + req = urllib2.Request("%s" % url_xml, None, HEADERS) + print(url_xml) + root = etree.parse(urllib2.urlopen(req)) + bbox = [] + for point in root.xpath('/GranuleMetaDataFile/GranuleURMetaData/SpatialDomainContainer/' + 'HorizontalSpatialDomainContainer/GPolygon/Boundary/Point'): + lon = point.xpath('./PointLongitude') + lat = point.xpath('./PointLatitude') + bbox.append((float(lat[0].text), float(lon[0].text))) + product_bbox = MultiPoint(bbox).convex_hull + if verbose: + for point in bbox: + (lat, lon) = point + LOG.info("Point: LAT -> %s LON -> %s" % (lat, lon)) + + if requested_bbox.intersects(product_bbox): + LOG.info("Compatible") + return True + else: + LOG.info("Not Compatible") + return False + + +def get_modisfiles(platform, product, year, tile, coordinates, proxy, + doy_start=1, doy_end=-1, + base_url="http://e4ftl01.cr.usgs.gov", out_dir=".", ruff=False, verbose=False): """Download MODIS products for a given tile, year & period of interest This function uses the `urllib2` module to download MODIS "granules" from @@ -154,8 +186,10 @@ def get_modisfiles ( platform, product, year, tile, proxy, \ The year of interest tile: str The tile (e.g., "h17v04") + coordinates: str + The coordinates of the upperleft and downright point of the requested bbox in the form 'lat,lon,lat,lon' proxy: dict - A proxy definition, such as {'http': 'http://127.0.0.1:8080', \ + A proxy definition, such as {'http': 'http://127.0.0.1:8080', 'ftp': ''}, etc. doy_start: int The starting day of the year. @@ -175,112 +209,126 @@ def get_modisfiles ( platform, product, year, tile, proxy, \ ------- Nothing """ - + if proxy is not None: - proxy = urllib2.ProxyHandler( proxy ) - opener = urllib2.build_opener( proxy ) - urllib2.install_opener( opener ) - - if not os.path.exists ( out_dir ): + proxy = urllib2.ProxyHandler(proxy) + opener = urllib2.build_opener(proxy) + urllib2.install_opener(opener) + + if not os.path.exists(out_dir): if verbose: - LOG.info("Creating outupt dir %s" % out_dir ) - os.makedirs ( out_dir ) + LOG.info("Creating outupt dir %s" % out_dir) + os.makedirs(out_dir) if doy_end == -1: - if calendar.isleap ( year ): + if calendar.isleap(year): doy_end = 367 else: doy_end = 366 - - dates = [time.strftime("%Y.%m.%d", time.strptime( "%d/%d" % ( i, year ), \ - "%j/%Y")) for i in xrange(doy_start, doy_end )] - url = "%s/%s/%s/" % ( base_url, platform, product ) - dates = parse_modis_dates ( url, dates, product, out_dir, ruff=ruff ) + + dates = [time.strftime("%Y.%m.%d", time.strptime("%d/%d" % (i, year), + "%j/%Y")) for i in range(doy_start, doy_end)] + url = "%s/%s/%s/" % (base_url, platform, product) + dates = parse_modis_dates(url, dates, product, out_dir, verbose, ruff=ruff) for date in dates: the_day_today = time.asctime().split()[0] - the_hour_now = int( time.asctime().split()[3].split(":")[0] ) + the_hour_now = int(time.asctime().split()[3].split(":")[0]) if the_day_today == "Wed" and 14 <= the_hour_now <= 17: - time.sleep ( 60*60*( 18-the_hour_now) ) - LOG.info ( "Sleeping for %d hours... Yawn!" % ( 18 - the_hour_now) ) - req = urllib2.Request ( "%s/%s" % ( url, date), None, HEADERS ) + time.sleep(60 * 60 * (18 - the_hour_now)) + LOG.info("Sleeping for %d hours... Yawn!" % (18 - the_hour_now)) + req = urllib2.Request("%s/%s" % (url, date), None, HEADERS) try: - html = urllib2.urlopen(req).readlines() + html = str(urllib2.urlopen(req).readlines()).split('') + for line in html: - if line.find( tile ) >=0 and line.find(".hdf") >= 0 and \ - line.find(".hdf.xml") < 0: + compatible = False + if tile is None and line.find(".hdf") >= 0 and line.find(".hdf.xml") < 0: fname = line.split("href=")[1].split(">")[0].strip('"') - req = urllib2.Request ( "%s/%s/%s" % ( url, date, fname), \ - None, HEADERS ) + compatible = parse_modis_coordinates("%s%s/%s.xml" % (url, date, fname), coordinates, verbose) + else: + if tile is not None and line.find(tile) >= 0 and line.find(".hdf") >= 0 and line.find( + ".hdf.xml") < 0: + fname = line.split("href=")[1].split(">")[0].strip('"') + compatible = True + + if compatible: + req = urllib2.Request("%s/%s/%s" % (url, date, fname), None, HEADERS) download = False - if not os.path.exists ( os.path.join( out_dir, fname ) ): + if not os.path.exists(os.path.join(out_dir, fname)): # File not present, download download = True else: the_remote_file = urllib2.urlopen(req) - remote_file_size = int ( \ - the_remote_file.headers.dict['content-length'] ) - local_file_size = os.path.getsize(os.path.join( \ - out_dir, fname ) ) + remote_file_size = int( + the_remote_file.headers['content-length']) + local_file_size = os.path.getsize(os.path.join( + out_dir, fname)) if remote_file_size != local_file_size: download = True - if download: if verbose: - LOG.info ( "Getting %s..... " % fname ) - with open ( os.path.join( out_dir, fname ), 'wb' ) \ - as local_file_fp: - shutil.copyfileobj(urllib2.urlopen(req), \ - local_file_fp) + LOG.info("Getting %s..... " % fname) + with open(os.path.join(out_dir, fname), 'wb') as local_file_fp: + shutil.copyfileobj(urllib2.urlopen(req), local_file_fp) if verbose: LOG.info("Done!") else: if verbose: - LOG.info ("File %s already present. Skipping" % \ - fname ) + LOG.info("File %s already present. Skipping" % fname) except urllib2.URLError: - LOG.info("Could not find data for %s(%s) for %s" % ( product, \ - platform, date )) + LOG.info("Could not find data for %s(%s) for %s" % (product, + platform, date)) if verbose: LOG.info("Completely finished downlading all there was") - + if __name__ == "__main__": - parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \ - usage=globals()['__doc__']) - parser.add_option ('-v', '--verbose', action='store_true', \ - default=False, help='verbose output') - parser.add_option ('-s', '--platform', action='store', dest="platform", \ - type=str, help='Platform type: MOLA, MOLT or MOTA' ) - parser.add_option ('-p', '--product', action='store', dest="product", \ - type=str, help="MODIS product name with collection tag at the end " + \ - "(e.g. MOD09GA.005)" ) - parser.add_option ('-t', '--tile', action="store", dest="tile", \ - type=str, help="Required tile (h17v04, for example)") - parser.add_option ( "-y", "--year", action="store", dest="year", \ - type=int, help="Year of interest" ) - parser.add_option('-o', '--output', action="store", dest="dir_out", \ - default=".", type=str, help="Output directory" ) - parser.add_option('-b', '--begin', action="store", dest="doy_start", \ - default=1, type=int, help="Starting day of year (DoY)" ) - parser.add_option('-e', '--end', action="store", dest="doy_end", \ - type=int, default=-1, help="Ending day of year (DoY)" ) - parser.add_option('-r', '--proxy', action="store", dest="proxy", \ - type=str, default=None, help="HTTP proxy URL" ) - parser.add_option('-q', '--quick', action="store_true", dest="quick", \ - default=False, help="Quick check to see whether files are present" ) + parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), + usage=globals()['__doc__']) + parser.add_option('-v', '--verbose', action='store_true', + default=False, help='verbose output') + parser.add_option('-s', '--platform', action='store', dest="platform", + type=str, help='Platform type: MOLA, MOLT or MOTA') + parser.add_option('-p', '--product', action='store', dest="product", + type=str, help="MODIS product name with collection tag at the end " + + "(e.g. MOD09GA.005)") + parser.add_option('-t', '--tile', action="store", dest="tile", + type=str, default=None, help="Required tile (h17v04, for example)") + parser.add_option('-c', '--cordinates', action="store", dest="coordinates", + type=str, default=None, help="Coordinates of non-reprojected tile upperleft " + "and downright point (lat,lon,lat,lon)") + parser.add_option('-y', '--year', action="store", dest="year", + type=int, help="Year of interest") + parser.add_option('-o', '--output', action="store", dest="dir_out", + default=".", type=str, help="Output directory") + parser.add_option('-b', '--begin', action="store", dest="doy_start", + default=1, type=int, help="Starting day of year (DoY)") + parser.add_option('-e', '--end', action="store", dest="doy_end", + type=int, default=-1, help="Ending day of year (DoY)") + parser.add_option('-r', '--proxy', action="store", dest="proxy", + type=str, default=None, help="HTTP proxy URL") + parser.add_option('-q', '--quick', action="store_true", dest="quick", + default=False, help="Quick check to see whether files are present") (options, args) = parser.parse_args() - if not ( options.platform in [ "MOLA", "MOTA", "MOLT" ] ) : - LOG.fatal ("`platform` has to be one of MOLA, MOTA, MOLT") + if not (options.platform in ["MOLA", "MOTA", "MOLT"]): + LOG.fatal("`platform` has to be one of MOLA, MOTA, MOLT") sys.exit(-1) if options.proxy is not None: - PROXY = { 'http': options.proxy } + PROXY = {'http': options.proxy} else: PROXY = None - - - - get_modisfiles ( options.platform, options.product, options.year, \ - options.tile, PROXY, \ - doy_start=options.doy_start, doy_end=options.doy_end, \ - out_dir=options.dir_out, \ - verbose=options.verbose, ruff=options.quick ) + if options.coordinates is None and options.tile is None: + LOG.fatal("You must insert coordinates or tile") + sys.exit(-1) + if options.coordinates is not None: + upperleft = (float(options.coordinates.split(',')[0]), float(options.coordinates.split(',')[1])) # lat, lon + downright = (float(options.coordinates.split(',')[2]), float(options.coordinates.split(',')[3])) # lat, lon + if downright[1] <= upperleft[1] or upperleft[0] <= downright[0]: + LOG.fatal("It seems that the coordinates are inverted.") + sys.exit(-1) + + get_modisfiles(options.platform, options.product, options.year, + options.tile, options.coordinates, PROXY, + doy_start=options.doy_start, doy_end=options.doy_end, + out_dir=options.dir_out, + verbose=options.verbose, ruff=options.quick)