Module geodata_harvester.getdata_silo

Python script to automatically download and crop climate data layers from SILO.

Functionalities: - download SILO data for custom time period and layer(s) as defined in dictionary - clip data to custom bounding box - save data as multi-band geotiff or netCDF

The SILO climate layers are described as dictionary in the module function get_silodict() and the SILO licensing and attribution are availabe with the module function getdict_license()

More details on the SILO climate variables can be found here: https://www.longpaddock.qld.gov.au/silo/about/climate-variables/ and more details about the gridded data structure here: https://www.longpaddock.qld.gov.au/silo/gridded-data/ and a data index: https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/index.html

This package is part of the Data Harvester project developed for the Agricultural Research Federation (AgReFed).

Copyright 2022 Sydney Informatics Hub (SIH), The University of Sydney

This open-source software is released under the LGPL-3.0 License.

Author: Sebastian Haan

Expand source code
"""
Python script to automatically download and crop climate data layers from SILO.

Functionalities:
- download SILO data for custom time period and layer(s) as defined in dictionary
- clip data to custom bounding box
- save data as multi-band geotiff or netCDF

The SILO climate layers are described as dictionary in the module function get_silodict()
and the SILO licensing and attribution are availabe with the module function getdict_license()

More details on the SILO climate variables can be found here:
https://www.longpaddock.qld.gov.au/silo/about/climate-variables/
and more details about the gridded data structure here:
https://www.longpaddock.qld.gov.au/silo/gridded-data/
and a data index:
https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/index.html

This package is part of the Data Harvester project developed for the Agricultural Research Federation (AgReFed).

Copyright 2022 Sydney Informatics Hub (SIH), The University of Sydney

This open-source software is released under the LGPL-3.0 License.

Author: Sebastian Haan
"""

import os
import shutil
import datetime
import requests
import numpy as np

# from urllib import request
from netCDF4 import Dataset
import rasterio
import rioxarray as rio
import xarray

from geodata_harvester import utils
from geodata_harvester.utils import spin

# from datacube.utils.cog import write_cog


def download_file(url, layername, year, outpath="."):
    """
    download file from url

    INPUT:
    url : str
    outpath : str

    OUTPUT:
    file : str
    """
    local_filename = os.path.join(outpath, url.split("/")[-1])
    if os.path.exists(local_filename):
        utils.msg_warn(f"{layername} for {year} already exists, skipping download")
        return local_filename
    with spin(f"Downloading {layername} for {year}") as s:
        with requests.get(url, stream=True) as r:
            with open(local_filename, "wb") as f:
                shutil.copyfileobj(r.raw, f)
        s(1)

    # with request.urlopen(url) as response:
    #     with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
    #         shutil.copyfileobj(response, tmp_file)
    # return tmp_file.name
    return local_filename


def get_silodict():
    """
    Get dictionary of available layers and meta data

    OUTPUT:
    layerdict : dict
        dictionary of meta data and available layer names
    """

    silodict = {}
    silodict["title"] = "SILO climate database"
    silodict[
        "description"
    ] = "SILO is containing continuous daily climate data for Australia from 1889 to present."
    silodict["crs"] = "EPSG:4326"
    silodict["bbox"] = [112.00, -44.00, 154.00, -10.00]
    silodict["resolution_arcsec"] = 180
    silodict["updates"] = "daily"
    silodict["layernames"] = {
        "daily_rain": "Daily rainfall, mm",
        "monthly_rain": "Monthly rainfall, mm",
        "max_temp": "Maximum temperature, degrees Celsius",
        "min_temp": "Minimum temperature, degrees Celsius",
        "vp": "Vapour pressure, hPa",
        "vp_deficit": "Vapour pressure deficit, hPa",
        "evap_pan": "Class A pan evaporation, mm",
        "evap_syn": "Synthetic estimate, mm",
        "evap_comb": "Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm",
        "evap_morton_lake": "Morton's shallow lake evaporation, mm",
        "radiation": "Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2",
        "rh_tmax": "Relative humidity:  Relative humidity at the time of maximum temperature, %",
        "rh_tmin": "Relative humidity at the time of minimum temperature, %",
        "et_short_crop": "Evapotranspiration FAO564 short crop, mm",
        "et_tall_crop": "ASCE5 tall crop6, mm",
        "et_morton_actual": "Morton's areal actual evapotranspiration, mm",
        "et_morton_potential": "Morton's point potential evapotranspiration, mm",
        "et_morton_wet": "Morton's wet-environment areal potential evapotranspiration over land, mm",
        "mslp": "Mean sea level pressure Mean sea level pressure, hPa",
    }
    return silodict


def getdict_license():
    """
    Retrieves the SILO license and attribution information as dict
    """
    dict = {
        "name": "SILO Climate Data",
        "source_url": "https://www.longpaddock.qld.gov.au/silo/",
        "license": "CC BY 4.0",
        "license_title": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
        "license_url": "https://creativecommons.org/licenses/by/4.0/",
        "copyright": "© State of Queensland (Queensland Department of Environment and Science) 2020.",
        "attribution": "State of Queensland (Queensland Department of Environment and Science) 2020.",
    }
    return dict


def xarray2tif(ds, outfname, layername):
    """
    Convert rio xarray dataset to multi-band geotiff with each time as separate band.

    TBD: optional: save separate tif each time slice

    INPUT:
    ds : xarray dataset
    outfname : str
        path+name of output file (".tif")

    OUTPUT:
    tif : str, name of multi-band geotiff
    """

    # create empty array
    dsnew = xarray.Dataset()

    for i in range(len(ds.time)):

        # We will the date of the image to name the GeoTIFF
        date = ds.isel(time=i).time.dt.strftime("%Y-%m-%d").data
        # print(f'Writing {date}')

        da = ds.isel(time=i)
        attr = ds.attrs
        dsnew[str(date)] = xarray.DataArray(
            da.variables[layername][:],
            dims=["lat", "lon"],
            coords={"lat": da.variables["lat"], "lon": da.variables["lon"]},
            name = str(date)     
        )
        dsnew[str(date)].attrs = {'long_name': str(date)}

        # Write GeoTIFF with datacube libary for cloud optimized GeoTIFFs (COG)
        # write_cog(geo_im=singletimestamp_da,
        #         fname=os.path.join(outpath,f'{outfname_base}_{date}.tif',
        #         overwrite=True)

    
    # Set crs
    dsnew.rio.write_crs(4326, inplace=True)

    # Write GeoTIFF to disk
    dsnew.rio.to_raster(outfname)


def get_SILO_layers(
    layernames,
    date_start,
    date_end,
    outpath,
    bbox=None,
    format_out="tif",
    delete_tempfiles=False,
    verbose=False,
    ):
    """
    Get raster data from SILO for certain climate variable and save data as geotif.
    If multiple times are requested, then each time will be saved in on band of multi-band geotif.
    All layers are available with daily resolution (except 'monthly_rain')

    This function includes validation of years and automatically download of data from SILO in temporary folder.

    Input:
        layernames : list climate variable names (see below)
        date_start : str, start date of time series in format 'YYYY-MM-DD'
        date_end : str, end date of time series in format 'YYYY-MM-DD'
        outpath : str, path to save output data
        bbox : list of bounding box coordinates (optional)
        format_out : str, format of output data: either 'NetCDF' (nc) or 'GeoTIFF' (tif)
        delete_tempfiles : bool, delete temporary files after processing

    Returns:
        fnames_out : list of output filenames

    layer names:
        - 'daily_rain' (Daily rainfall, mm)
        - 'monthly_rain' (Monthly rainfall, mm)
        - 'max_temp' (Maximum temperature, deg C)
        - 'min_temp'  (Minimum temperature. deg C)
        - 'vp' (Vapour pressure, hPa)
        - 'vp_deficit' (Vapour pressure deficit, hPa)
        - 'evap_pan' (Class A pan evaporation, mm)
        - 'evap_syn' (Synthetic estimate, mm)
        - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm)
        - 'evap_morton_lake' (Morton's shallow lake evaporation, mm)
        - 'radiation'   (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2)
        - 'rh_tmax'     (Relative humidity:     Relative humidity at the time of maximum temperature, %)
        - 'rh_tmin'     (Relative humidity at the time of minimum temperature, %)
        - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm)
        - 'et_tall_crop' (ASCE5 tall crop6, mm)
        - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm)
        - 'et_morton_potential' (Morton's point potential evapotranspiration, mm)
        - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm)
        - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

    For more details see:
    SILO data structure doc for gridded data:
    https://www.longpaddock.qld.gov.au/silo/gridded-data/

    Notes: Here we use the SILO annual raster API to download the data and then trim to date range. 
    For data ranges much smaller than a year, one could also use the SILO daily raster API and combine dates.
    """

    # define outpath for temporary files
    outpath_temp = os.path.join(outpath, 'temp_silo')
    os.makedirs(outpath, exist_ok=True)
    date_start = str(date_start)
    date_end = str(date_end)
    years = np.arange(int(date_start[:4]), int(date_end[:4]) + 1).tolist()
    fnames_out_silo = []
    
    for layername in layernames:

        # run the download
        fnames_out = get_SILO_raster(
            layername, 
            years, 
            outpath_temp, 
            bbox = bbox, 
            format_out = 'nc', 
            delete_temp = False)

        # process the data into a single file and trim to date range
        if (format_out == 'tif') | (format_out == 'GeoTIFF'):
            outfname = os.path.join(outpath, "silo_" + layername + "_" + date_start + "-" + date_end + ".tif")
        elif (format_out == 'NetCDF') | (format_out == 'nc'):
            outfname = os.path.join(outpath, "silo_" + layername + "_" + date_start + "-" + date_end + ".nc")
        else:
            raise ValueError("format_out must be either 'tif' or 'nc'")

        # Combine all years into one file and trim to date range
        process_raster_daterange(fnames_out, date_start, date_end, outfname, layername)

        # Delete temporary cropped files (not the downloaded file)
        for fname in fnames_out:
            os.remove(fname)
    
        #Save the layer name
        # Check if outfname is list or string
        if isinstance(outfname, list):
            fnames_out_silo += outfname
        else:
            fnames_out_silo.append(outfname)

    return fnames_out_silo
    



def process_raster_daterange(infnames, date_start, date_end, outfname, layername):
    """ Combines all the raster data into one xarray dataset, trimming to the date range, 
    and saves it as a multiband tif file.

    Input:
        infnames : list of input filenames
        date_start : str, start date of time series in format 'YYYY-MM-DD'
        date_end : str, end date of time series in format 'YYYY-MM-DD'
        outfname : str, path+name of output file
    """
    # Read all the raster data into one xarray dataset
    try:
        # Requires dask installed
        ds = xarray.open_mfdataset(infnames, combine="by_coords")
    except:
        # If dask is not installed, merge it this way (not as memory efficient)
        ds = xarray.merge([xarray.open_dataset(f) for f in infnames])
    # Clip to date range
    ds = ds.sel(time=slice(date_start, date_end))
    # Save file
    # if file extension is tif, save as 
    if outfname.endswith('.tif'):
        xarray2tif(ds, outfname, layername)
    if outfname.endswith('.nc'):
        ds.to_netcdf(outfname)
    # Close file
    ds.close()



def get_SILO_raster(
    layername,
    years,
    outpath,
    bbox=None,
    format_out="nc",
    delete_temp=False,
    verbose=False,
    ):
    """
    Get raster data from SILO for certain climate variable and save data as geotif.
    If multiple times are requested, then each time will be saved in on band of multi-band geotif.
    All layers are available with daily resolution (except 'monthly_rain')

    This function includes validation of years and automatically download of data from SILO in temporary folder.

    Input:
        layername : str, climate variable name (see below)
        years : list of years
        outpath : str, path to save output data
        bbox : list of bounding box coordinates (optional)
        format_out : str, format of output data: either 'nc' (netCDF) or 'tif' (geotiff)
        delete_temp : bool, delete temporary folder after download

    Returns:
        fnames_out : list of output filenames



    layer names:
        - 'daily_rain' (Daily rainfall, mm)
        - 'monthly_rain' (Monthly rainfall, mm)
        - 'max_temp' (Maximum temperature, deg C)
        - 'min_temp'  (Minimum temperature. deg C)
        - 'vp' (Vapour pressure, hPa)
        - 'vp_deficit' (Vapour pressure deficit, hPa)
        - 'evap_pan' (Class A pan evaporation, mm)
        - 'evap_syn' (Synthetic estimate, mm)
        - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm)
        - 'evap_morton_lake' (Morton's shallow lake evaporation, mm)
        - 'radiation'   (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2)
        - 'rh_tmax'     (Relative humidity:     Relative humidity at the time of maximum temperature, %)
        - 'rh_tmin'     (Relative humidity at the time of minimum temperature, %)
        - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm)
        - 'et_tall_crop' (ASCE5 tall crop6, mm)
        - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm)
        - 'et_morton_potential' (Morton's point potential evapotranspiration, mm)
        - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm)
        - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

    For more details see:
    SILO data structure doc for gridded data:
    https://www.longpaddock.qld.gov.au/silo/gridded-data/

    SILO url structure:
    url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/<variable>/<year>.<variable>.nc
    e.g. url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/monthly_rain/2005.monthly_rain.nc"
    """

    # Check if layername is valid
    silodict = get_silodict()
    layerdict = silodict["layernames"]
    if layername not in layerdict:
        raise ValueError(f"Layer name {layername} not valid. Choose from: {str(layerdict.keys())}")

    # Create output folder
    os.makedirs(outpath, exist_ok=True)

    # Check if years are valid
    if not (isinstance(years, tuple) | isinstance(years, list)):
        # If not a list, make it a list
        years = [years]
    # Get current year from datetime
    current_year = int(datetime.datetime.now().year)

    # Check if format is valid
    if format_out not in ["nc", "tif", "NetCDF", "GeoTIFF"]:
        raise ValueError("Output format not valid. Choose from: 'NetCDF'' or 'GeoTIFF'")

    # Check if years are in the range of available years
    url_info = "https://www.longpaddock.qld.gov.au/silo/gridded-data/"
    for year in years:
        if year > current_year:
            raise ValueError(f"Choose years <= {current_year}")
            return False
        if year < 1889:
            print(f"data is not available for years < 1889.")
            print(f"see for more details: {url_info}")
            return False
        if (year < 1970) & (layername == "evap_pan"):
            print(
                f"{layername} is not available for years < 1970. Automatically set to evap_comb"
            )
            print(f"see for more details: {url_info}")
            layername = "evap_comb"
        if (year < 1957) & (layername == "mslp"):
            print(f"{layername} is not available for years < 1957.")
            print(f"see for more details: {url_info}")
            return False

    # Silo base url
    silo_baseurl = (
        "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/"
    )

    fnames_out = []
    # Download data for each year and save as geotif
    for year in years:
        # Get url
        url = silo_baseurl + layername + "/" + str(year) + "." + layername + ".nc"
        # Download file
        # print(f'Downloading data for year {year} from {url} ...')
        filename = download_file(url, layername, year, outpath)

        # Open file in Xarray
        ds = xarray.open_dataset(filename)
        # select data in bbox:
        if bbox is not None:
            ds = ds.sel(lon=slice(bbox[0], bbox[2]), lat=slice(bbox[1], bbox[3]))
        # Save data
        if (format_out == "nc") | (format_out == "NetCDF"):
            # Save netCDF file
            outfname = layername + "_" + str(year) + "_cropped.nc"
            outfile = os.path.join(outpath, outfname)
            ds.to_netcdf(outfile)
        elif (format_out == "tif") | (format_out == "GeoTIFF"):
            # Save as multi-band geotiff file
            outfname = layername + "_" + str(year) + "_cropped.tif"
            xarray2tif(ds, os.path.join(outpath, outfname), layername)
        # Close file
        ds.close()
        # Remove file
        if delete_temp:
            os.remove(filename)
        # print("Saved " + layername + " for year " + str(year) + " as geotif: ")
        # print(os.path.join(outpath,outfname))
        fnames_out.append(os.path.join(outpath, outfname))

    # Convert netcdf to geotif
    # nc_to_geotif(filename, outpath, layername, year)
    # https://pratiman-91.github.io/2020/08/01/NetCDF-to-GeoTIFF-using-Python.html
    return fnames_out

### Test function ###

def test_get_SILO_raster():
    """
    test script
    """
    layername = "daily_rain"
    years = 2019
    outpath = "silo_test"
    bbox = (130, -44, 153.9, -11)
    # test first for tif output format
    format_out = "tif"
    fnames_out = get_SILO_raster(layername, years, outpath, bbox, format_out)
    assert len(fnames_out) > 0


def test_get_SILO_layers():
    """
    test script
    """
    layernames = ["daily_rain","max_temp"]
    date_start = '2019-01-01'
    date_end = '2020-02-01'
    outpath = "silo_test"
    bbox = (130, -44, 153.9, -11)
    # test first for tif output format
    format_out = "tif"
    fnames_out = get_SILO_layers(layernames, date_start, date_end, outpath, bbox, format_out)
    assert len(fnames_out) > 0

Functions

def download_file(url, layername, year, outpath='.')

download file from url

INPUT: url : str outpath : str

OUTPUT: file : str

Expand source code
def download_file(url, layername, year, outpath="."):
    """
    download file from url

    INPUT:
    url : str
    outpath : str

    OUTPUT:
    file : str
    """
    local_filename = os.path.join(outpath, url.split("/")[-1])
    if os.path.exists(local_filename):
        utils.msg_warn(f"{layername} for {year} already exists, skipping download")
        return local_filename
    with spin(f"Downloading {layername} for {year}") as s:
        with requests.get(url, stream=True) as r:
            with open(local_filename, "wb") as f:
                shutil.copyfileobj(r.raw, f)
        s(1)

    # with request.urlopen(url) as response:
    #     with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
    #         shutil.copyfileobj(response, tmp_file)
    # return tmp_file.name
    return local_filename
def get_SILO_layers(layernames, date_start, date_end, outpath, bbox=None, format_out='tif', delete_tempfiles=False, verbose=False)

Get raster data from SILO for certain climate variable and save data as geotif. If multiple times are requested, then each time will be saved in on band of multi-band geotif. All layers are available with daily resolution (except 'monthly_rain')

This function includes validation of years and automatically download of data from SILO in temporary folder.

Input

layernames : list climate variable names (see below) date_start : str, start date of time series in format 'YYYY-MM-DD' date_end : str, end date of time series in format 'YYYY-MM-DD' outpath : str, path to save output data bbox : list of bounding box coordinates (optional) format_out : str, format of output data: either 'NetCDF' (nc) or 'GeoTIFF' (tif) delete_tempfiles : bool, delete temporary files after processing

Returns

fnames_out
list of output filenames

layer names: - 'daily_rain' (Daily rainfall, mm) - 'monthly_rain' (Monthly rainfall, mm) - 'max_temp' (Maximum temperature, deg C) - 'min_temp' (Minimum temperature. deg C) - 'vp' (Vapour pressure, hPa) - 'vp_deficit' (Vapour pressure deficit, hPa) - 'evap_pan' (Class A pan evaporation, mm) - 'evap_syn' (Synthetic estimate, mm) - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm) - 'evap_morton_lake' (Morton's shallow lake evaporation, mm) - 'radiation' (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2) - 'rh_tmax' (Relative humidity: Relative humidity at the time of maximum temperature, %) - 'rh_tmin' (Relative humidity at the time of minimum temperature, %) - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm) - 'et_tall_crop' (ASCE5 tall crop6, mm) - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm) - 'et_morton_potential' (Morton's point potential evapotranspiration, mm) - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm) - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

For more details see: SILO data structure doc for gridded data: https://www.longpaddock.qld.gov.au/silo/gridded-data/

Notes: Here we use the SILO annual raster API to download the data and then trim to date range. For data ranges much smaller than a year, one could also use the SILO daily raster API and combine dates.

Expand source code
def get_SILO_layers(
    layernames,
    date_start,
    date_end,
    outpath,
    bbox=None,
    format_out="tif",
    delete_tempfiles=False,
    verbose=False,
    ):
    """
    Get raster data from SILO for certain climate variable and save data as geotif.
    If multiple times are requested, then each time will be saved in on band of multi-band geotif.
    All layers are available with daily resolution (except 'monthly_rain')

    This function includes validation of years and automatically download of data from SILO in temporary folder.

    Input:
        layernames : list climate variable names (see below)
        date_start : str, start date of time series in format 'YYYY-MM-DD'
        date_end : str, end date of time series in format 'YYYY-MM-DD'
        outpath : str, path to save output data
        bbox : list of bounding box coordinates (optional)
        format_out : str, format of output data: either 'NetCDF' (nc) or 'GeoTIFF' (tif)
        delete_tempfiles : bool, delete temporary files after processing

    Returns:
        fnames_out : list of output filenames

    layer names:
        - 'daily_rain' (Daily rainfall, mm)
        - 'monthly_rain' (Monthly rainfall, mm)
        - 'max_temp' (Maximum temperature, deg C)
        - 'min_temp'  (Minimum temperature. deg C)
        - 'vp' (Vapour pressure, hPa)
        - 'vp_deficit' (Vapour pressure deficit, hPa)
        - 'evap_pan' (Class A pan evaporation, mm)
        - 'evap_syn' (Synthetic estimate, mm)
        - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm)
        - 'evap_morton_lake' (Morton's shallow lake evaporation, mm)
        - 'radiation'   (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2)
        - 'rh_tmax'     (Relative humidity:     Relative humidity at the time of maximum temperature, %)
        - 'rh_tmin'     (Relative humidity at the time of minimum temperature, %)
        - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm)
        - 'et_tall_crop' (ASCE5 tall crop6, mm)
        - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm)
        - 'et_morton_potential' (Morton's point potential evapotranspiration, mm)
        - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm)
        - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

    For more details see:
    SILO data structure doc for gridded data:
    https://www.longpaddock.qld.gov.au/silo/gridded-data/

    Notes: Here we use the SILO annual raster API to download the data and then trim to date range. 
    For data ranges much smaller than a year, one could also use the SILO daily raster API and combine dates.
    """

    # define outpath for temporary files
    outpath_temp = os.path.join(outpath, 'temp_silo')
    os.makedirs(outpath, exist_ok=True)
    date_start = str(date_start)
    date_end = str(date_end)
    years = np.arange(int(date_start[:4]), int(date_end[:4]) + 1).tolist()
    fnames_out_silo = []
    
    for layername in layernames:

        # run the download
        fnames_out = get_SILO_raster(
            layername, 
            years, 
            outpath_temp, 
            bbox = bbox, 
            format_out = 'nc', 
            delete_temp = False)

        # process the data into a single file and trim to date range
        if (format_out == 'tif') | (format_out == 'GeoTIFF'):
            outfname = os.path.join(outpath, "silo_" + layername + "_" + date_start + "-" + date_end + ".tif")
        elif (format_out == 'NetCDF') | (format_out == 'nc'):
            outfname = os.path.join(outpath, "silo_" + layername + "_" + date_start + "-" + date_end + ".nc")
        else:
            raise ValueError("format_out must be either 'tif' or 'nc'")

        # Combine all years into one file and trim to date range
        process_raster_daterange(fnames_out, date_start, date_end, outfname, layername)

        # Delete temporary cropped files (not the downloaded file)
        for fname in fnames_out:
            os.remove(fname)
    
        #Save the layer name
        # Check if outfname is list or string
        if isinstance(outfname, list):
            fnames_out_silo += outfname
        else:
            fnames_out_silo.append(outfname)

    return fnames_out_silo
def get_SILO_raster(layername, years, outpath, bbox=None, format_out='nc', delete_temp=False, verbose=False)

Get raster data from SILO for certain climate variable and save data as geotif. If multiple times are requested, then each time will be saved in on band of multi-band geotif. All layers are available with daily resolution (except 'monthly_rain')

This function includes validation of years and automatically download of data from SILO in temporary folder.

Input

layername : str, climate variable name (see below) years : list of years outpath : str, path to save output data bbox : list of bounding box coordinates (optional) format_out : str, format of output data: either 'nc' (netCDF) or 'tif' (geotiff) delete_temp : bool, delete temporary folder after download

Returns

fnames_out
list of output filenames

layer names: - 'daily_rain' (Daily rainfall, mm) - 'monthly_rain' (Monthly rainfall, mm) - 'max_temp' (Maximum temperature, deg C) - 'min_temp' (Minimum temperature. deg C) - 'vp' (Vapour pressure, hPa) - 'vp_deficit' (Vapour pressure deficit, hPa) - 'evap_pan' (Class A pan evaporation, mm) - 'evap_syn' (Synthetic estimate, mm) - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm) - 'evap_morton_lake' (Morton's shallow lake evaporation, mm) - 'radiation' (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2) - 'rh_tmax' (Relative humidity: Relative humidity at the time of maximum temperature, %) - 'rh_tmin' (Relative humidity at the time of minimum temperature, %) - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm) - 'et_tall_crop' (ASCE5 tall crop6, mm) - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm) - 'et_morton_potential' (Morton's point potential evapotranspiration, mm) - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm) - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

For more details see: SILO data structure doc for gridded data: https://www.longpaddock.qld.gov.au/silo/gridded-data/

SILO url structure: url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual//..nc e.g. url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/monthly_rain/2005.monthly_rain.nc"

Expand source code
def get_SILO_raster(
    layername,
    years,
    outpath,
    bbox=None,
    format_out="nc",
    delete_temp=False,
    verbose=False,
    ):
    """
    Get raster data from SILO for certain climate variable and save data as geotif.
    If multiple times are requested, then each time will be saved in on band of multi-band geotif.
    All layers are available with daily resolution (except 'monthly_rain')

    This function includes validation of years and automatically download of data from SILO in temporary folder.

    Input:
        layername : str, climate variable name (see below)
        years : list of years
        outpath : str, path to save output data
        bbox : list of bounding box coordinates (optional)
        format_out : str, format of output data: either 'nc' (netCDF) or 'tif' (geotiff)
        delete_temp : bool, delete temporary folder after download

    Returns:
        fnames_out : list of output filenames



    layer names:
        - 'daily_rain' (Daily rainfall, mm)
        - 'monthly_rain' (Monthly rainfall, mm)
        - 'max_temp' (Maximum temperature, deg C)
        - 'min_temp'  (Minimum temperature. deg C)
        - 'vp' (Vapour pressure, hPa)
        - 'vp_deficit' (Vapour pressure deficit, hPa)
        - 'evap_pan' (Class A pan evaporation, mm)
        - 'evap_syn' (Synthetic estimate, mm)
        - 'evap_comb' (Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm)
        - 'evap_morton_lake' (Morton's shallow lake evaporation, mm)
        - 'radiation'   (Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2)
        - 'rh_tmax'     (Relative humidity:     Relative humidity at the time of maximum temperature, %)
        - 'rh_tmin'     (Relative humidity at the time of minimum temperature, %)
        - 'et_short_crop' (Evapotranspiration FAO564 short crop, mm)
        - 'et_tall_crop' (ASCE5 tall crop6, mm)
        - 'et_morton_actual' (Morton's areal actual evapotranspiration, mm)
        - 'et_morton_potential' (Morton's point potential evapotranspiration, mm)
        - 'et_morton_wet' (Morton's wet-environment areal potential evapotranspiration over land, mm)
        - 'mslp' (Mean sea level pressure Mean sea level pressure, hPa)

    For more details see:
    SILO data structure doc for gridded data:
    https://www.longpaddock.qld.gov.au/silo/gridded-data/

    SILO url structure:
    url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/<variable>/<year>.<variable>.nc
    e.g. url = "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/monthly_rain/2005.monthly_rain.nc"
    """

    # Check if layername is valid
    silodict = get_silodict()
    layerdict = silodict["layernames"]
    if layername not in layerdict:
        raise ValueError(f"Layer name {layername} not valid. Choose from: {str(layerdict.keys())}")

    # Create output folder
    os.makedirs(outpath, exist_ok=True)

    # Check if years are valid
    if not (isinstance(years, tuple) | isinstance(years, list)):
        # If not a list, make it a list
        years = [years]
    # Get current year from datetime
    current_year = int(datetime.datetime.now().year)

    # Check if format is valid
    if format_out not in ["nc", "tif", "NetCDF", "GeoTIFF"]:
        raise ValueError("Output format not valid. Choose from: 'NetCDF'' or 'GeoTIFF'")

    # Check if years are in the range of available years
    url_info = "https://www.longpaddock.qld.gov.au/silo/gridded-data/"
    for year in years:
        if year > current_year:
            raise ValueError(f"Choose years <= {current_year}")
            return False
        if year < 1889:
            print(f"data is not available for years < 1889.")
            print(f"see for more details: {url_info}")
            return False
        if (year < 1970) & (layername == "evap_pan"):
            print(
                f"{layername} is not available for years < 1970. Automatically set to evap_comb"
            )
            print(f"see for more details: {url_info}")
            layername = "evap_comb"
        if (year < 1957) & (layername == "mslp"):
            print(f"{layername} is not available for years < 1957.")
            print(f"see for more details: {url_info}")
            return False

    # Silo base url
    silo_baseurl = (
        "https://s3-ap-southeast-2.amazonaws.com/silo-open-data/Official/annual/"
    )

    fnames_out = []
    # Download data for each year and save as geotif
    for year in years:
        # Get url
        url = silo_baseurl + layername + "/" + str(year) + "." + layername + ".nc"
        # Download file
        # print(f'Downloading data for year {year} from {url} ...')
        filename = download_file(url, layername, year, outpath)

        # Open file in Xarray
        ds = xarray.open_dataset(filename)
        # select data in bbox:
        if bbox is not None:
            ds = ds.sel(lon=slice(bbox[0], bbox[2]), lat=slice(bbox[1], bbox[3]))
        # Save data
        if (format_out == "nc") | (format_out == "NetCDF"):
            # Save netCDF file
            outfname = layername + "_" + str(year) + "_cropped.nc"
            outfile = os.path.join(outpath, outfname)
            ds.to_netcdf(outfile)
        elif (format_out == "tif") | (format_out == "GeoTIFF"):
            # Save as multi-band geotiff file
            outfname = layername + "_" + str(year) + "_cropped.tif"
            xarray2tif(ds, os.path.join(outpath, outfname), layername)
        # Close file
        ds.close()
        # Remove file
        if delete_temp:
            os.remove(filename)
        # print("Saved " + layername + " for year " + str(year) + " as geotif: ")
        # print(os.path.join(outpath,outfname))
        fnames_out.append(os.path.join(outpath, outfname))

    # Convert netcdf to geotif
    # nc_to_geotif(filename, outpath, layername, year)
    # https://pratiman-91.github.io/2020/08/01/NetCDF-to-GeoTIFF-using-Python.html
    return fnames_out
def get_silodict()

Get dictionary of available layers and meta data

OUTPUT: layerdict : dict dictionary of meta data and available layer names

Expand source code
def get_silodict():
    """
    Get dictionary of available layers and meta data

    OUTPUT:
    layerdict : dict
        dictionary of meta data and available layer names
    """

    silodict = {}
    silodict["title"] = "SILO climate database"
    silodict[
        "description"
    ] = "SILO is containing continuous daily climate data for Australia from 1889 to present."
    silodict["crs"] = "EPSG:4326"
    silodict["bbox"] = [112.00, -44.00, 154.00, -10.00]
    silodict["resolution_arcsec"] = 180
    silodict["updates"] = "daily"
    silodict["layernames"] = {
        "daily_rain": "Daily rainfall, mm",
        "monthly_rain": "Monthly rainfall, mm",
        "max_temp": "Maximum temperature, degrees Celsius",
        "min_temp": "Minimum temperature, degrees Celsius",
        "vp": "Vapour pressure, hPa",
        "vp_deficit": "Vapour pressure deficit, hPa",
        "evap_pan": "Class A pan evaporation, mm",
        "evap_syn": "Synthetic estimate, mm",
        "evap_comb": "Combination: synthetic estimate pre-1970, class A pan 1970 onwards, mm",
        "evap_morton_lake": "Morton's shallow lake evaporation, mm",
        "radiation": "Solar radiation: Solar exposure, consisting of both direct and diffuse components, MJ/m2",
        "rh_tmax": "Relative humidity:  Relative humidity at the time of maximum temperature, %",
        "rh_tmin": "Relative humidity at the time of minimum temperature, %",
        "et_short_crop": "Evapotranspiration FAO564 short crop, mm",
        "et_tall_crop": "ASCE5 tall crop6, mm",
        "et_morton_actual": "Morton's areal actual evapotranspiration, mm",
        "et_morton_potential": "Morton's point potential evapotranspiration, mm",
        "et_morton_wet": "Morton's wet-environment areal potential evapotranspiration over land, mm",
        "mslp": "Mean sea level pressure Mean sea level pressure, hPa",
    }
    return silodict
def getdict_license()

Retrieves the SILO license and attribution information as dict

Expand source code
def getdict_license():
    """
    Retrieves the SILO license and attribution information as dict
    """
    dict = {
        "name": "SILO Climate Data",
        "source_url": "https://www.longpaddock.qld.gov.au/silo/",
        "license": "CC BY 4.0",
        "license_title": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
        "license_url": "https://creativecommons.org/licenses/by/4.0/",
        "copyright": "© State of Queensland (Queensland Department of Environment and Science) 2020.",
        "attribution": "State of Queensland (Queensland Department of Environment and Science) 2020.",
    }
    return dict
def process_raster_daterange(infnames, date_start, date_end, outfname, layername)

Combines all the raster data into one xarray dataset, trimming to the date range, and saves it as a multiband tif file.

Input

infnames : list of input filenames date_start : str, start date of time series in format 'YYYY-MM-DD' date_end : str, end date of time series in format 'YYYY-MM-DD' outfname : str, path+name of output file

Expand source code
def process_raster_daterange(infnames, date_start, date_end, outfname, layername):
    """ Combines all the raster data into one xarray dataset, trimming to the date range, 
    and saves it as a multiband tif file.

    Input:
        infnames : list of input filenames
        date_start : str, start date of time series in format 'YYYY-MM-DD'
        date_end : str, end date of time series in format 'YYYY-MM-DD'
        outfname : str, path+name of output file
    """
    # Read all the raster data into one xarray dataset
    try:
        # Requires dask installed
        ds = xarray.open_mfdataset(infnames, combine="by_coords")
    except:
        # If dask is not installed, merge it this way (not as memory efficient)
        ds = xarray.merge([xarray.open_dataset(f) for f in infnames])
    # Clip to date range
    ds = ds.sel(time=slice(date_start, date_end))
    # Save file
    # if file extension is tif, save as 
    if outfname.endswith('.tif'):
        xarray2tif(ds, outfname, layername)
    if outfname.endswith('.nc'):
        ds.to_netcdf(outfname)
    # Close file
    ds.close()
def test_get_SILO_layers()

test script

Expand source code
def test_get_SILO_layers():
    """
    test script
    """
    layernames = ["daily_rain","max_temp"]
    date_start = '2019-01-01'
    date_end = '2020-02-01'
    outpath = "silo_test"
    bbox = (130, -44, 153.9, -11)
    # test first for tif output format
    format_out = "tif"
    fnames_out = get_SILO_layers(layernames, date_start, date_end, outpath, bbox, format_out)
    assert len(fnames_out) > 0
def test_get_SILO_raster()

test script

Expand source code
def test_get_SILO_raster():
    """
    test script
    """
    layername = "daily_rain"
    years = 2019
    outpath = "silo_test"
    bbox = (130, -44, 153.9, -11)
    # test first for tif output format
    format_out = "tif"
    fnames_out = get_SILO_raster(layername, years, outpath, bbox, format_out)
    assert len(fnames_out) > 0
def xarray2tif(ds, outfname, layername)

Convert rio xarray dataset to multi-band geotiff with each time as separate band.

TBD: optional: save separate tif each time slice

INPUT: ds : xarray dataset outfname : str path+name of output file (".tif")

OUTPUT: tif : str, name of multi-band geotiff

Expand source code
def xarray2tif(ds, outfname, layername):
    """
    Convert rio xarray dataset to multi-band geotiff with each time as separate band.

    TBD: optional: save separate tif each time slice

    INPUT:
    ds : xarray dataset
    outfname : str
        path+name of output file (".tif")

    OUTPUT:
    tif : str, name of multi-band geotiff
    """

    # create empty array
    dsnew = xarray.Dataset()

    for i in range(len(ds.time)):

        # We will the date of the image to name the GeoTIFF
        date = ds.isel(time=i).time.dt.strftime("%Y-%m-%d").data
        # print(f'Writing {date}')

        da = ds.isel(time=i)
        attr = ds.attrs
        dsnew[str(date)] = xarray.DataArray(
            da.variables[layername][:],
            dims=["lat", "lon"],
            coords={"lat": da.variables["lat"], "lon": da.variables["lon"]},
            name = str(date)     
        )
        dsnew[str(date)].attrs = {'long_name': str(date)}

        # Write GeoTIFF with datacube libary for cloud optimized GeoTIFFs (COG)
        # write_cog(geo_im=singletimestamp_da,
        #         fname=os.path.join(outpath,f'{outfname_base}_{date}.tif',
        #         overwrite=True)

    
    # Set crs
    dsnew.rio.write_crs(4326, inplace=True)

    # Write GeoTIFF to disk
    dsnew.rio.to_raster(outfname)