!pip install -qq harmony-py python-cmr wget
Examining TEMPO data retrieved from NASA Earthdata
Overview
Table of Contents
- Setup
- Search for data granules
- Examining and downloading file results
- Reading and inspecting the data
- Working with the data to subset and plot
- Using Harmony-py to pre-process and retrieve data
- Plotting the data
Dataset Information
This notebook uses data from the Tropospheric Emissions: Monitoring of Pollution (TEMPO) instrument.
1. Setup
# Load packages into current runtime
import earthaccess # needed to discover and download TEMPO data
import netCDF4 as nc # needed to read TEMPO data
import os
import sys
import platform
from subprocess import Popen
import shutil
import numpy as np
#import numpy.ma as ma
import matplotlib.pyplot as plt # needed to plot the resulting time series
from matplotlib.pyplot import cm
import matplotlib.ticker as mticker
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import datetime as dt
import getpass
import traceback
from typing import Dict
import xarray as xr
import cmr
from harmony import BBox, Client, Collection, Request
from harmony.config import Environment
Logging in and creating local directory
# User needs to create an account at https://www.earthdata.nasa.gov/
# Function earthaccess.login prompts for EarthData login and password.
= earthaccess.login(strategy="interactive", persist=True)
auth
# Creating local directory
= os.path.expanduser("~") + os.sep
homeDir
with open(homeDir + '.dodsrc', 'w') as file:
file.write('HTTP.COOKIEJAR={}.urs_cookies\n'.format(homeDir))
file.write('HTTP.NETRC={}.netrc'.format(homeDir))
file.close()
print('Saved .dodsrc to:', homeDir)
# Set appropriate permissions for Linux/macOS
if platform.system() != "Windows":
'chmod og-rw ~/.netrc', shell=True)
Popen(else:
# Copy dodsrc to working directory in Windows
+ '.dodsrc', os.getcwd())
shutil.copy2(homeDir print('Copied .dodsrc to:', os.getcwd())
Saved .dodsrc to: /home/jovyan/
2. Search for TEMPO granules
We are going to search for data about nitrogen dioxide (\(NO_2\)) concentrations from TEMPO.
! rm *.nc*
= 'TEMPO_NO2_L3' # collection name to search for in the EarthData
short_name = 'V03'
version
= earthaccess.login()
auth
auth.refresh_tokens()
# Point of interest: NASA Langley Research Center, HamptonVA, USA
# latitude 37.1036 deg, longitude -76.3868 deg
#POI_lat = 37.1036
#POI_lon = -76.3868
# generic location, somewhere in the middle of the USA
= 38.
POI_lat = -96.
POI_lon = '2024-09-01 00:00:00'
date_start = '2024-09-01 23:59:59'
date_end
= earthaccess.search_data(short_name = short_name\
POI_results = version\
, version = (date_start, date_end)\
, temporal = (POI_lon, POI_lat)) # search by point of interest
, point
= 5. # deg
dlat = 6. # deg
dlon = earthaccess.search_data(short_name = short_name\
bbox_results = version\
, version = (date_start, date_end)\
, temporal = (POI_lon - dlon, POI_lat - dlat, POI_lon + dlon, POI_lat + dlat)) # search by bounding box , bounding_box
Granules found: 18
Granules found: 18
3. Examining and downloading file results
What does a result look like?
0] POI_results[
Let’s get the link to a granule
print(POI_results[-1].data_links()[0])
https://data.asdc.earthdata.nasa.gov/asdc-prod-protected/TEMPO/TEMPO_NO2_L3_V03/2024.09.01/TEMPO_NO2_L3_V03_20240901T235825Z_S016.nc
Let’s examine the file names present in the results object.
for r in POI_results:
= r.data_links()[0].split('/')[-1]
granule_name print(granule_name)
TEMPO_NO2_L3_V03_20240831T235844Z_S016.nc
TEMPO_NO2_L3_V03_20240901T003849Z_S017.nc
TEMPO_NO2_L3_V03_20240901T105755Z_S001.nc
TEMPO_NO2_L3_V03_20240901T113800Z_S002.nc
TEMPO_NO2_L3_V03_20240901T121805Z_S003.nc
TEMPO_NO2_L3_V03_20240901T125810Z_S004.nc
TEMPO_NO2_L3_V03_20240901T133815Z_S005.nc
TEMPO_NO2_L3_V03_20240901T143815Z_S006.nc
TEMPO_NO2_L3_V03_20240901T153815Z_S007.nc
TEMPO_NO2_L3_V03_20240901T163815Z_S008.nc
TEMPO_NO2_L3_V03_20240901T173815Z_S009.nc
TEMPO_NO2_L3_V03_20240901T183815Z_S010.nc
TEMPO_NO2_L3_V03_20240901T193815Z_S011.nc
TEMPO_NO2_L3_V03_20240901T203815Z_S012.nc
TEMPO_NO2_L3_V03_20240901T213815Z_S013.nc
TEMPO_NO2_L3_V03_20240901T223815Z_S014.nc
TEMPO_NO2_L3_V03_20240901T231820Z_S015.nc
TEMPO_NO2_L3_V03_20240901T235825Z_S016.nc
Downloading granulues
= earthaccess.download(POI_results[8:12], local_path=".") files
Getting 4 granules, approx download size: 3.35 GB
Accessing cloud dataset using dataset endpoint credentials: https://data.asdc.earthdata.nasa.gov/s3credentials
Downloaded: TEMPO_NO2_L3_V03_20240901T153815Z_S007.nc
Downloaded: TEMPO_NO2_L3_V03_20240901T163815Z_S008.nc
Downloaded: TEMPO_NO2_L3_V03_20240901T173815Z_S009.nc
Downloaded: TEMPO_NO2_L3_V03_20240901T183815Z_S010.nc
4. Reading and inspecting the data
We first define a function for reading data from TEMPO_NO2_L3_V03 netCDF file.
def read_TEMPO_NO2_L3(fn):
= nc.Dataset(fn) # open access to file
ds
= ds.groups['product'] # this opens group product, /product, as prod
prod
= prod.variables['vertical_column_stratosphere'] # this reads variable vertical_column_stratosphere from prod (group product, /product)
var = np.array(var) # create a numpy array
strat_NO2_column = var.getncattr('_FillValue') # read fill value attribute
fv_strat_NO2
= prod.variables['vertical_column_troposphere'] # this reads variable 'vertical_column_troposphere' from prod (group product, /product)
var = np.array(var)
trop_NO2_column = var.getncattr('_FillValue')
fv_trop_NO2 = var.getncattr('units') # read unit attribute for variable vertical_column_troposphere
NO2_unit
= prod.variables['main_data_quality_flag'] # this reads variable 'main_data_quality_flag' from prod (group product, /product)
var = np.array(var)
QF
= np.array(ds.variables['latitude']) # this reads variable latitude from root (/) into a numpy array
lat = np.array(ds.variables['longitude']) # this reads variable longitude from root (/) into a numpy array
lon
# close access to file
ds.close()
return lat, lon, strat_NO2_column, fv_strat_NO2, trop_NO2_column, fv_trop_NO2, NO2_unit, QF
Let’s now examine the data in a granulue
= POI_results[8].data_links()[0].split('/')[-1]
granule_name print(granule_name)
=\
lat, lon, strat_NO2_column, fv_strat_NO2, trop_NO2_column, fv_trop_NO2, NO2_unit, QF read_TEMPO_NO2_L3(granule_name)
TEMPO_NO2_L3_V03_20240901T153815Z_S007.nc
print('unit of NO2 column is ',NO2_unit) # unit of NO2 column
unit of NO2 column is molecules/cm^2
# lat is a 1D array:
lat.shape
(2950,)
# lat is a 1D array:
lon.shape
(7750,)
# stratospheric NO2 column is a 3D array
# with second dimension being the number of latitudes and third being the number of longitudes:
strat_NO2_column.shape
(1, 2950, 7750)
# and so is tropospheric NO2 column:
trop_NO2_column.shape
(1, 2950, 7750)
# fill value for the array of stratospheric NO2 column fv_strat_NO2
-1e+30
How many valid (non-fill) values are there?
len(strat_NO2_column[strat_NO2_column != fv_strat_NO2])
8078862
len(trop_NO2_column[trop_NO2_column != fv_trop_NO2])
8077832
How many pixels with high quality flag, 0, are there?
len(QF[QF == 0])
7363629
the numbers are different
So, we need to create arrays of equal lenght to add them up in order to get total column
= (QF == 0)\
good_data_mask & (trop_NO2_column != fv_trop_NO2)\
& (strat_NO2_column != fv_strat_NO2)
print(good_data_mask.shape)
(1, 2950, 7750)
How many good pixels are there?
= trop_NO2_column[good_data_mask]
good_trop_NO2_column len(good_trop_NO2_column)
7363186
unfortunate reality - “good” pixels may contain negative column
min(good_trop_NO2_column)
-5.4297665877187496e+16
getting physically meaningful pixels
= good_data_mask & (trop_NO2_column > 0.) & (strat_NO2_column > 0.) best_data_mask
= trop_NO2_column[best_data_mask]
best_trop_NO2_column len(best_trop_NO2_column)
6262529
5. Working with the data to subset and plot
working with data: spatial subsetting and masking out bad data
# define region of interest
= 5 # deg
dlat = 6 # deg
dlon = POI_lat - dlat
min_lat = POI_lat + dlat
max_lat = POI_lon - dlon
min_lon = POI_lon + dlon
max_lon
# subsetting NO2 column array
= (lat > min_lat) & (lat < max_lat)
mask_lat = (lon > min_lon) & (lon < max_lon)
mask_lon = lat[mask_lat]
lat_loc = lon[mask_lon]
lon_loc = trop_NO2_column[0,mask_lat,:][:, mask_lon]
trop_NO2_column_loc = strat_NO2_column[0,mask_lat,:][:, mask_lon]
strat_NO2_column_loc = QF[0,mask_lat,:][:, mask_lon]
QF_loc
= (QF_loc == 0) & (trop_NO2_column_loc > 0.) & (strat_NO2_column_loc > 0.)
best_data_mask_loc
# creating 2D arrays of latitudes and longitudes
= trop_NO2_column_loc.shape
(nlat, nlon) = np.empty((nlat, nlon), dtype = float)
lat_loc_2D = np.empty((nlat, nlon), dtype = float)
lon_loc_2D
for ilat in range(nlat): lon_loc_2D[ilat, :] = lon_loc
for ilon in range(nlon): lat_loc_2D[:, ilon] = lat_loc
plotting spatial distribution
# Create a figure
= plt.figure(figsize=(5, 6), dpi=300, facecolor = None)
fig
# Create a Cartopy projection
= ccrs.PlateCarree()
proj =ccrs.PlateCarree()
transform
# Adding subplot
= fig.add_subplot(111, projection=proj)
ax # Add coastlines
=0.5)
ax.coastlines(linewidth# Add U.S. state borders
=':', edgecolor='gray', linewidth=0.5)
ax.add_feature(cfeature.STATES, linestyle
= ax.scatter(lon_loc_2D[best_data_mask_loc], lat_loc_2D[best_data_mask_loc], s=1\
im = trop_NO2_column_loc[best_data_mask_loc] + strat_NO2_column_loc[best_data_mask_loc]\
, c = 0, vmax = 5.e+16, transform=transform)
, vmin -103, -89, 32, 44], crs=transform)
ax.set_extent([
= plt.colorbar(im, ticks=[0, 1.e+16, 2.e+16, 3.e+16, 4.e+16, 5.e+16], fraction=0.022, pad=0.01)
cb 'total NO2 col, '+NO2_unit, fontsize=10)
cb.set_label(
plt.show()
6. Another way to get the data - HarmonyPy
print('Please provide your Earthdata Login credentials to allow data access')
print('Your credentials will only be passed to Earthdata and will not be exposed in the notebook')
= input('Username:')
username
= Client(env=Environment.PROD, auth=(username, getpass.getpass())) harmony_client
Please provide your Earthdata Login credentials to allow data access
Your credentials will only be passed to Earthdata and will not be exposed in the notebook
Username: alexrad71
········
collection_ID is needed for this method
How do we get one? Search for collection short name, TEMPO_NO2_L3, in EarthData https://search.earthdata.nasa.gov/search and select right version, V03: find collection_ID in the address line:
getting specific datasets from a granule
= POI_results[8].data_links()[0].split('/')[-1]
granule_name
# TEMPO Formaldehyde L3 C2930763263-LARC_CLOUD
= Request(collection=Collection(id='C2930763263-LARC_CLOUD')\
request =[granule_name]\
, granule_name=['product/vertical_column_troposphere'\
, variables'product/vertical_column_stratosphere'\
, 'product/main_data_quality_flag'\
, 'latitude', 'longitude'])
,
= harmony_client.submit(request)
job_id print(f'jobID = {job_id}')
=True)
harmony_client.wait_for_processing(job_id, show_progress
# Download the resulting files
= harmony_client.download_all(job_id, directory='.', overwrite=True)
results = [f.result() for f in results]
all_results_stored print(f"Number of result files: {len(all_results_stored)}")
jobID = 3f2735bb-8d4d-4b30-ba3a-e9c29bc54ae0
[ Processing: 100% ] |###################################################| [|]
./TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted.nc4
Number of result files: 1
./TEMPO_NO2_L3_V03_20240901T121805Z_S003_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T113800Z_S002_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T105755Z_S001_subsetted.nc4
./TEMPO_NO2_L3_V03_20240831T235844Z_S016_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T163815Z_S008_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T143815Z_S006_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T125810Z_S004_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T133815Z_S005_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T203815Z_S012_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T183815Z_S010_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T231820Z_S015_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T213815Z_S013_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T003849Z_S017_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T223815Z_S014_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T193815Z_S011_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T173815Z_S009_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T235825Z_S016_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted.nc4
make sure we got the same numbers - read and test the subset
= 'TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted.nc4'
granule_name =\
lat, lon, strat_NO2_column, fv_strat_NO2, trop_NO2_column, fv_trop_NO2, NO2_unit, QF read_TEMPO_NO2_L3(granule_name)
How many pixels with valid column values and high quality flag?
there should be 7363186
= (QF == 0) & (trop_NO2_column != fv_trop_NO2) & (strat_NO2_column != fv_strat_NO2)
good_data_mask len(trop_NO2_column[good_data_mask])
7363186
copy subset file - it will be over-written at the next step
for r in all_results_stored:
print(r)
= r.rfind('.')
ind = r[:ind]+'_variable_subset' + r[ind:]
new_name print(new_name)
shutil.copy(r, new_name)
./TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted.nc4
./TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted_variable_subset.nc4
spatial AND variable subset
= Request(collection=Collection(id='C2930763263-LARC_CLOUD')\
request # Note there is not a granule specified!
=BBox(min_lon, min_lat, max_lon, max_lat)\
, spatial={'start': dt.datetime(2024, 9, 1, 0, 0, 0)\
, temporal'stop': dt.datetime(2024, 9, 1, 23, 59, 59)}
, =['product/vertical_column_troposphere'\
, variables'product/vertical_column_stratosphere'\
, 'product/main_data_quality_flag'\
, 'latitude', 'longitude'])
,
= harmony_client.submit(request)
job_id
print(f'jobID = {job_id}')
=True)
harmony_client.wait_for_processing(job_id, show_progress
# Download the resulting files
= harmony_client.download_all(job_id, directory='.', overwrite=True)
results = sorted([f.result() for f in results])
all_results_stored print(f'Number of result files: {len(all_results_stored)}')
jobID = 38de28c9-844b-41ec-93d0-a7a9d6a467f8
[ Processing: 100% ] |###################################################| [|]
Number of result files: 18
can the new subsetted granule be read by the our custom-made function read_TEMPO_NO2_L3(fn)?
= all_results_stored[8]
granule_name print(granule_name)
=\
lat, lon, strat_NO2_column, fv_strat_NO2, trop_NO2_column, fv_trop_NO2, NO2_unit, QF read_TEMPO_NO2_L3(granule_name)
./TEMPO_NO2_L3_V03_20240901T153815Z_S007_subsetted.nc4
Yes, it can. But we can use another method to access netCDF files
using xarray library
# Open the data files
= dict()
ds_dict for r in all_results_stored:
= xr.open_dataset(r)
ds_root = xr.open_dataset(r, group='product')
ds_product
= xr.merge([ds_root, ds_product]) ds_dict[r]
7. Plotting the data
plotting individual images
for i, (dk, dv) in enumerate(ds_dict.items()):
= dv['vertical_column_troposphere'].values
Var = dv['main_data_quality_flag'].values
QF = (Var > 0.) & (QF == 0)
best_trop_NO2_mask = Var[best_trop_NO2_mask]
best_trop_NO2
= Var.shape
(nt, nlat, nlon) = dv['longitude'].values
lon_1D = np.empty((nt, nlat, nlon))
lon_2D for j in range(nlat): lon_2D[0, j, :] = lon_1D
= dv['latitude'].values
lat_1D = np.empty((nt, nlat, nlon))
lat_2D for j in range(nlon):lat_2D[0, :, j] = lat_1D
print(i)
# Create a figure
= plt.figure(figsize=(5, 6), dpi=120, facecolor = None)
fig # Create a Cartopy projection
= ccrs.PlateCarree()
proj =ccrs.PlateCarree()
transform# Adding subplot
= fig.add_subplot(111, projection=proj)
ax # Add coastlines
=0.5)
ax.coastlines(linewidth# Add U.S. state borders
=':', edgecolor='gray', linewidth=0.5)
ax.add_feature(cfeature.STATES, linestyle# Set plot title, in this case - the name of subset granule
'/')[-1], fontsize=9)
ax.set_title(dk.split(# Set map extent
-103, -89, 32, 44], crs=transform)
ax.set_extent([# Set gridlines
= ax.gridlines(draw_labels=True, dms=True, color='gray', linestyle='--', linewidth=0.5)
gl = LONGITUDE_FORMATTER
gl.xformatter = LATITUDE_FORMATTER
gl.yformatter = False # no labels on the top margin
gl.top_labels = False # no labels on the right margin
gl.right_labels = True
gl.xlines = True
gl.ylines = mticker.FixedLocator([-100, -96, -92]) # longitude ticks
gl.xlocator = mticker.FixedLocator([34, 38, 42]) # latitude ticks
gl.ylocator
if len(best_trop_NO2) == 0:
= ax.scatter([POI_lon], [POI_lat], s=0\
im = [0], vmin = 0, vmax = 1.5e+16, transform=transform)
, c else:
= ax.scatter(lon_2D[best_trop_NO2_mask], lat_2D[best_trop_NO2_mask], s=1\
im = best_trop_NO2, vmin = 0, vmax = 1.5e+16, transform=transform)
, c
= plt.colorbar(im, ticks=[0, .5e+16, 1.e+16, 1.5e+16], fraction=0.022, pad=0.1)
cb 'trop NO2 col, '+NO2_unit, fontsize=10)
cb.set_label(
plt.show()
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
plotting multi-panel image
= len(ds_dict)
ngr # Create a figure
= plt.figure(figsize=(6, 5*ngr), dpi=120, facecolor = None)
fig # Create a Cartopy projection
= ccrs.PlateCarree()
proj =ccrs.PlateCarree()
transform
for i, (dk, dv) in enumerate(ds_dict.items()):
= dv['vertical_column_troposphere'].values
Var = dv['main_data_quality_flag'].values
QF = (Var > 0.) & (QF == 0)
best_trop_NO2_mask = Var[best_trop_NO2_mask]
best_trop_NO2
= Var.shape
(nt, nlat, nlon) = dv['longitude'].values
lon_1D = np.empty((nt, nlat, nlon))
lon_2D for j in range(nlat): lon_2D[0, j, :] = lon_1D
= dv['latitude'].values
lat_1D = np.empty((nt, nlat, nlon))
lat_2D for j in range(nlon):lat_2D[0, :, j] = lat_1D
print(i)
# Adding subplot
= fig.add_subplot(ngr, 1, i+1, projection=proj)
ax # Add coastlines
=0.5)
ax.coastlines(linewidth# Add U.S. state borders
=':', edgecolor='gray', linewidth=0.5)
ax.add_feature(cfeature.STATES, linestyle# Set plot title, in this case - the name of subset granule
'/')[-1], fontsize=9)
ax.set_title(dk.split(# Set map extent
-103, -89, 32, 44], crs=transform)
ax.set_extent([# Set gridlines
= ax.gridlines(draw_labels=True, dms=True, color='gray', linestyle='--', linewidth=0.5)
gl = LONGITUDE_FORMATTER
gl.xformatter = LATITUDE_FORMATTER
gl.yformatter = False # no labels on the top margin
gl.top_labels = False # no labels on the right margin
gl.right_labels = True
gl.xlines = True
gl.ylines = mticker.FixedLocator([-100, -96, -92]) # longitude ticks
gl.xlocator = mticker.FixedLocator([34, 38, 42]) # latitude ticks
gl.ylocator
if len(best_trop_NO2) == 0: # if there are no data points, ...
= ax.scatter([POI_lon], [POI_lat], s=0\
im = [0], vmin = 0, vmax = 1.5e+16, transform=transform) # ... plot an empty image
, c else:
= ax.scatter(lon_2D[best_trop_NO2_mask], lat_2D[best_trop_NO2_mask], s=1\
im = best_trop_NO2, vmin = 0, vmax = 1.5e+16, transform=transform)
, c
= plt.colorbar(im, ticks=[0, .5e+16, 1.e+16, 1.5e+16], fraction=0.022, pad=0.1)
cb 'trop NO2 col, '+NO2_unit, fontsize=10)
cb.set_label(
plt.show()
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17