Module python_scripts.utils
Some utility functions.
This package is part of the machine learning project developed for the Agricultural Research Federation (AgReFed).
Expand source code
"""
Some utility functions.
This package is part of the machine learning project developed for the Agricultural Research Federation (AgReFed).
"""
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.transform import Affine
from scipy import spatial
def project_data(infname, outfname, colname_lng_source, colname_lat_source, tocrs_epsg):
"""
Projects csv file into new coordinate reference system (crs) such as from Lat/Long to meter
The projected file with convertd coordinates are saved as csv.
Input
----
infname: inoput path + filename
infname: output path + filename
Result
------
CSV file saved to disk
"""
df_soil = pd.read_csv(infname)
#First convert WGS84 ( or epsg:4326) to meters, here we choose default web merkator projection (epgs:3857)
gdf = gpd.GeoDataFrame(df_soil.copy(), geometry=gpd.points_from_xy(df_soil[colname_lng_source], df_soil[colname_lat_source]))
#gdf.crs = {'init' :'epsg:4326'} # Depreciated with geopandas >= 0.7
gdf.crs = "EPSG:4326"
gdf = gdf.to_crs(tocrs_epsg)
gdf['Easting'] = gdf.geometry.x
gdf['Northing'] = gdf.geometry.y
# Save without geometry column as csv
gdf.drop('geometry', axis=1).to_csv(outfname)
def array2geotiff(array, xy0, xyres, outfname, epsg_crs):
"""
Converts numpy array into geopastial refernced tiff image (geotiff)
Input
-----
array: numpy 2-dim array with height = shape[0] and width = shape[1]
xy0: Coordinate origin [x0,y0] in given crs
xyres: resolution [xres, yres]
outfname: output path + filename.tif
crs: Coordinate reference system, e.g. 'EPSG:28355'
Return
------
Saves geotiff file
returns True/False
"""
crs = rasterio.crs.CRS.from_string(epsg_crs)
transform = Affine.translation(xy0[0] - xyres[0] / 2, xy0[1] - xyres[1] / 2) * Affine.scale(xyres[0], xyres[1])
# Write to file:
try:
with rasterio.open(
outfname,
'w',
driver='GTiff',
height=array.shape[0],
width=array.shape[1],
count=1,
dtype=array.dtype,
crs=crs,
transform=transform,
) as dst:
dst.write(array, 1)
return True
except:
print("ERROR: Writing array to Geotiff failed!")
return False
def align_nearest_neighbor(xy_orig, xy_ref, data_orig, max_dist):
"""
Find nearest neighbor in reference coordinates and
return matching data at locations and with shape of reference grid
Input
-----
xy_orig: original positions, 2D array with shape (n_orig,2)
xy_ref: reference positions, 2D array with shape (n_ref, 2)
data_orig: array with n_orig data values, or list with multiple arrays
max_dist: maximum distance for nearest neighbor,
reference array with distances larger than that are set to nan values
Return
------
array or list of arrays with values that match refernce grid
"""
assert xy_orig.shape[1] == xy_ref.shape[1] == 2
tree = spatial.cKDTree(xy_ref)
ind_nearest = tree.query(xy_orig)[1]
nearest_dist = np.sqrt((xy_orig[:,0] - xy_ref[ind_nearest][:,0])**2
+ (xy_orig[:,1] - xy_ref[ind_nearest][:,1])**2)
if isinstance(data_orig, list):
res = []
for data in data_orig:
ires = data[ind_nearest]
ires[nearest_dist > max_dist] = np.nan
res.append(ires)
else:
res = data_orig[ind_nearest]
res[nearest_dist > max_dist] = np.nan
return res
def print2(text, fname_out = os.path.join('loginfo.txt')):
"""
Prints text to standard output (typically terminal) and to file simultaneously
Note text: Advise to use f-string for complex text
Alternative: Use logger info
INPUT:
text: string (if 'init' new header will be written, and old contentwill be overwritten)
fname_out: path + file name (Default: {outpath}/info.txt)
"""
if text == 'init':
# Initialise a new file (e.g., if program is restarted)
if os.path.isfile(fname_out):
with open(fname_out, 'w') as f:
print('OUTPUT INFO', file = f)
print('-----------', file = f)
pass
else:
print(text)
if os.path.isfile(fname_out):
arg = 'a'
else:
arg = 'w'
with open(fname_out, arg) as f:
print(text, file = f)
def truncate_data(data, cutperc):
"""
Combines lits of arrays and truncates with lower and upper percentile
Input
-----
data: list of numpy arrays
cutperc: percent to cut off from percentile
Return
-------
truncated flattened data array
"""
datacomb = data[0]
for i in range(len(data)-1):
datacomb = np.concatenate((datacomb, data[i+1]))
low = np.percentile(datacomb, cutperc)
high = np.percentile(datacomb, 100-cutperc)
return datacomb[(datacomb >= low) & (datacomb <= high)]
def create_vtkcube(data, origin, voxelsize, fname):
"""
Export Cube as VTK file (can be used in e.g. ParaView)
and create a range of 3D cube plots with pyvista
INPUT
data: 3D cube in shape (xdim, ydim, zdim)
origin: origin cooridnates of cube
voxelsize: voxel sizes in (xsize, ysize, zsize)
fname: path + filename for files
"""
grid = pv.UniformGrid()
grid.dimensions = np.array(data.shape) + 1
grid.origin = origin
grid.spacing = voxelsize
grid.cell_arrays["values"] = data.flatten(order="F")
grid.save(fname)
Functions
def align_nearest_neighbor(xy_orig, xy_ref, data_orig, max_dist)-
Find nearest neighbor in reference coordinates and return matching data at locations and with shape of reference grid
Input
xy_orig: original positions, 2D array with shape (n_orig,2) xy_ref: reference positions, 2D array with shape (n_ref, 2) data_orig: array with n_orig data values, or list with multiple arrays max_dist: maximum distance for nearest neighbor, reference array with distances larger than that are set to nan valuesReturn
array or list of arrays with values that match refernce gridExpand source code
def align_nearest_neighbor(xy_orig, xy_ref, data_orig, max_dist): """ Find nearest neighbor in reference coordinates and return matching data at locations and with shape of reference grid Input ----- xy_orig: original positions, 2D array with shape (n_orig,2) xy_ref: reference positions, 2D array with shape (n_ref, 2) data_orig: array with n_orig data values, or list with multiple arrays max_dist: maximum distance for nearest neighbor, reference array with distances larger than that are set to nan values Return ------ array or list of arrays with values that match refernce grid """ assert xy_orig.shape[1] == xy_ref.shape[1] == 2 tree = spatial.cKDTree(xy_ref) ind_nearest = tree.query(xy_orig)[1] nearest_dist = np.sqrt((xy_orig[:,0] - xy_ref[ind_nearest][:,0])**2 + (xy_orig[:,1] - xy_ref[ind_nearest][:,1])**2) if isinstance(data_orig, list): res = [] for data in data_orig: ires = data[ind_nearest] ires[nearest_dist > max_dist] = np.nan res.append(ires) else: res = data_orig[ind_nearest] res[nearest_dist > max_dist] = np.nan return res def array2geotiff(array, xy0, xyres, outfname, epsg_crs)-
Converts numpy array into geopastial refernced tiff image (geotiff)
Input
array: numpy 2-dim array with height = shape[0] and width = shape[1] xy0: Coordinate origin [x0,y0] in given crs xyres: resolution [xres, yres] outfname: output path + filename.tif crs: Coordinate reference system, e.g. 'EPSG:28355'Return
Saves geotiff file returns True/FalseExpand source code
def array2geotiff(array, xy0, xyres, outfname, epsg_crs): """ Converts numpy array into geopastial refernced tiff image (geotiff) Input ----- array: numpy 2-dim array with height = shape[0] and width = shape[1] xy0: Coordinate origin [x0,y0] in given crs xyres: resolution [xres, yres] outfname: output path + filename.tif crs: Coordinate reference system, e.g. 'EPSG:28355' Return ------ Saves geotiff file returns True/False """ crs = rasterio.crs.CRS.from_string(epsg_crs) transform = Affine.translation(xy0[0] - xyres[0] / 2, xy0[1] - xyres[1] / 2) * Affine.scale(xyres[0], xyres[1]) # Write to file: try: with rasterio.open( outfname, 'w', driver='GTiff', height=array.shape[0], width=array.shape[1], count=1, dtype=array.dtype, crs=crs, transform=transform, ) as dst: dst.write(array, 1) return True except: print("ERROR: Writing array to Geotiff failed!") return False def create_vtkcube(data, origin, voxelsize, fname)-
Export Cube as VTK file (can be used in e.g. ParaView) and create a range of 3D cube plots with pyvista
INPUT data: 3D cube in shape (xdim, ydim, zdim) origin: origin cooridnates of cube voxelsize: voxel sizes in (xsize, ysize, zsize) fname: path + filename for files
Expand source code
def create_vtkcube(data, origin, voxelsize, fname): """ Export Cube as VTK file (can be used in e.g. ParaView) and create a range of 3D cube plots with pyvista INPUT data: 3D cube in shape (xdim, ydim, zdim) origin: origin cooridnates of cube voxelsize: voxel sizes in (xsize, ysize, zsize) fname: path + filename for files """ grid = pv.UniformGrid() grid.dimensions = np.array(data.shape) + 1 grid.origin = origin grid.spacing = voxelsize grid.cell_arrays["values"] = data.flatten(order="F") grid.save(fname) def print2(text, fname_out='loginfo.txt')-
Prints text to standard output (typically terminal) and to file simultaneously Note text: Advise to use f-string for complex text Alternative: Use logger info
Input
text: string (if 'init' new header will be written, and old contentwill be overwritten) fname_out: path + file name (Default: {outpath}/info.txt)
Expand source code
def print2(text, fname_out = os.path.join('loginfo.txt')): """ Prints text to standard output (typically terminal) and to file simultaneously Note text: Advise to use f-string for complex text Alternative: Use logger info INPUT: text: string (if 'init' new header will be written, and old contentwill be overwritten) fname_out: path + file name (Default: {outpath}/info.txt) """ if text == 'init': # Initialise a new file (e.g., if program is restarted) if os.path.isfile(fname_out): with open(fname_out, 'w') as f: print('OUTPUT INFO', file = f) print('-----------', file = f) pass else: print(text) if os.path.isfile(fname_out): arg = 'a' else: arg = 'w' with open(fname_out, arg) as f: print(text, file = f) def project_data(infname, outfname, colname_lng_source, colname_lat_source, tocrs_epsg)-
Projects csv file into new coordinate reference system (crs) such as from Lat/Long to meter The projected file with convertd coordinates are saved as csv.
Input
infname: inoput path + filename infname: output path + filenameResult
CSV file saved to disk
Expand source code
def project_data(infname, outfname, colname_lng_source, colname_lat_source, tocrs_epsg): """ Projects csv file into new coordinate reference system (crs) such as from Lat/Long to meter The projected file with convertd coordinates are saved as csv. Input ---- infname: inoput path + filename infname: output path + filename Result ------ CSV file saved to disk """ df_soil = pd.read_csv(infname) #First convert WGS84 ( or epsg:4326) to meters, here we choose default web merkator projection (epgs:3857) gdf = gpd.GeoDataFrame(df_soil.copy(), geometry=gpd.points_from_xy(df_soil[colname_lng_source], df_soil[colname_lat_source])) #gdf.crs = {'init' :'epsg:4326'} # Depreciated with geopandas >= 0.7 gdf.crs = "EPSG:4326" gdf = gdf.to_crs(tocrs_epsg) gdf['Easting'] = gdf.geometry.x gdf['Northing'] = gdf.geometry.y # Save without geometry column as csv gdf.drop('geometry', axis=1).to_csv(outfname) def truncate_data(data, cutperc)-
Combines lits of arrays and truncates with lower and upper percentile
Input
data: list of numpy arrays cutperc: percent to cut off from percentile Return
truncated flattened data arrayExpand source code
def truncate_data(data, cutperc): """ Combines lits of arrays and truncates with lower and upper percentile Input ----- data: list of numpy arrays cutperc: percent to cut off from percentile Return ------- truncated flattened data array """ datacomb = data[0] for i in range(len(data)-1): datacomb = np.concatenate((datacomb, data[i+1])) low = np.percentile(datacomb, cutperc) high = np.percentile(datacomb, 100-cutperc) return datacomb[(datacomb >= low) & (datacomb <= high)]