━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━9.3/9.3 MB38.2 MB/s eta 0:00:00━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━250.0/250.0 kB11.5 MB/s eta 0:00:00━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━149.6/149.6 kB8.2 MB/s eta 0:00:00━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━1.4/1.4 MB26.0 MB/s eta 0:00:00
Now import the libraries.
If you get a ModuleNotFoundError:, try restarting the kernel.
# Import Librariesimport pyprojimport xarray as xrimport pyrsigimport pandas as pdimport pycnoimport matplotlib.pyplot as plt
Step 2: Exploring Data
Import libraries
Prepare a pyrsig object
# Choosing a Northeast domain for 2023 December 18th# Dec 18th is in the public data.locname ="nyc"bbox = (-74.8, 40.32, -71.43, 41.4)bdate ="2023-12-18"
api = pyrsig.RsigApi(bdate=bdate, bbox=bbox, workdir=locname, gridfit=True)# api_key = getpass.getpass('Enter TEMPO key (anonymous if unknown):')api_key ="anonymous"# using public, so using anonymousapi.tempo_kw["api_key"] = api_key
# after the cell runs, click on the table button.# Then use filters to find tempo data producs by names that start with tempodescdf = api.descriptions()descdf# descdf.query('name.str.contains("tempo")')
# By default, the pyrsig uses 'ascii' backend, but 'xdr' is faster;# both look the same in python, but the files are very different.# I'm using xdr here for speeddf = api.to_dataframe(tempokey, backend="xdr")df
Timestamp(UTC)
LONGITUDE(deg)
LATITUDE(deg)
no2_vertical_column_troposphere(molecules/cm2)
Longitude_SW(deg)
Longitude_SE(deg)
Longitude_NW(deg)
Longitude_NE(deg)
Latitude_SW(deg)
Latitude_SE(deg)
Latitude_NW(deg)
Latitude_NE(deg)
0
2023-12-18T15:11:00+0000
-73.316078
41.398060
3.093594e+15
-73.281212
-73.344002
-73.288191
-73.351015
41.410378
41.407287
41.388793
41.385610
1
2023-12-18T15:11:00+0000
-72.758522
41.398186
7.423907e+13
-72.723591
-72.786112
-72.730680
-72.793116
41.410442
41.408224
41.389041
41.386914
2
2023-12-18T15:11:00+0000
-72.820793
41.396824
4.233557e+14
-72.786112
-72.848591
-72.793116
-72.855520
41.408224
41.406334
41.386914
41.385095
3
2023-12-18T15:11:00+0000
-72.883331
41.394588
1.407843e+15
-72.848591
-72.911537
-72.855520
-72.918381
41.406334
41.403631
41.385095
41.382483
4
2023-12-18T15:11:00+0000
-72.946648
41.391434
3.048500e+15
-72.911537
-72.974806
-72.918381
-72.981564
41.403631
41.400612
41.382483
41.379558
...
...
...
...
...
...
...
...
...
...
...
...
...
890
2023-12-18T18:18:00+0000
-73.764481
40.330441
5.616886e+15
-73.730804
-73.792147
-73.737051
-73.798367
40.341922
40.339574
40.321104
40.318759
891
2023-12-18T18:18:00+0000
-73.826035
40.327888
4.467097e+15
-73.792147
-73.853292
-73.798367
-73.859488
40.339574
40.337360
40.318759
40.316548
892
2023-12-18T18:18:00+0000
-73.886749
40.326015
8.049984e+15
-73.853292
-73.913992
-73.859488
-73.920162
40.337360
40.335357
40.316548
40.314547
893
2023-12-18T18:18:00+0000
-73.947411
40.323883
1.004033e+16
-73.913992
-73.975153
-73.920162
-73.981304
40.335357
40.333200
40.314547
40.312383
894
2023-12-18T18:18:00+0000
-74.009056
40.321693
8.188616e+15
-73.975153
-74.036465
-73.981304
-74.042583
40.333200
40.330881
40.312383
40.310077
9569 rows × 12 columns
# Do it again, but cleanup the keys and add time object# Notice that the file is reuseddf = api.to_dataframe(tempokey, unit_keys=False, parse_dates=True, backend="xdr")df
Using cached: nyc/tempo.l2.no2.vertical_column_troposphere_2023-12-18T000000Z_2023-12-18T235959Z.xdr.gz
Timestamp
LONGITUDE
LATITUDE
no2_vertical_column_troposphere
Longitude_SW
Longitude_SE
Longitude_NW
Longitude_NE
Latitude_SW
Latitude_SE
Latitude_NW
Latitude_NE
time
0
2023-12-18T15:11:00+0000
-73.316078
41.398060
3.093594e+15
-73.281212
-73.344002
-73.288191
-73.351015
41.410378
41.407287
41.388793
41.385610
2023-12-18 15:11:00+00:00
1
2023-12-18T15:11:00+0000
-72.758522
41.398186
7.423907e+13
-72.723591
-72.786112
-72.730680
-72.793116
41.410442
41.408224
41.389041
41.386914
2023-12-18 15:11:00+00:00
2
2023-12-18T15:11:00+0000
-72.820793
41.396824
4.233557e+14
-72.786112
-72.848591
-72.793116
-72.855520
41.408224
41.406334
41.386914
41.385095
2023-12-18 15:11:00+00:00
3
2023-12-18T15:11:00+0000
-72.883331
41.394588
1.407843e+15
-72.848591
-72.911537
-72.855520
-72.918381
41.406334
41.403631
41.385095
41.382483
2023-12-18 15:11:00+00:00
4
2023-12-18T15:11:00+0000
-72.946648
41.391434
3.048500e+15
-72.911537
-72.974806
-72.918381
-72.981564
41.403631
41.400612
41.382483
41.379558
2023-12-18 15:11:00+00:00
...
...
...
...
...
...
...
...
...
...
...
...
...
...
890
2023-12-18T18:18:00+0000
-73.764481
40.330441
5.616886e+15
-73.730804
-73.792147
-73.737051
-73.798367
40.341922
40.339574
40.321104
40.318759
2023-12-18 18:18:00+00:00
891
2023-12-18T18:18:00+0000
-73.826035
40.327888
4.467097e+15
-73.792147
-73.853292
-73.798367
-73.859488
40.339574
40.337360
40.318759
40.316548
2023-12-18 18:18:00+00:00
892
2023-12-18T18:18:00+0000
-73.886749
40.326015
8.049984e+15
-73.853292
-73.913992
-73.859488
-73.920162
40.337360
40.335357
40.316548
40.314547
2023-12-18 18:18:00+00:00
893
2023-12-18T18:18:00+0000
-73.947411
40.323883
1.004033e+16
-73.913992
-73.975153
-73.920162
-73.981304
40.335357
40.333200
40.314547
40.312383
2023-12-18 18:18:00+00:00
894
2023-12-18T18:18:00+0000
-74.009056
40.321693
8.188616e+15
-73.975153
-74.036465
-73.981304
-74.042583
40.333200
40.330881
40.312383
40.310077
2023-12-18 18:18:00+00:00
9569 rows × 13 columns
Step 3: Compare to observations
Make an hourly average product.
Make a simple time-series plot
Do the same with airnow to compare
# Make an hourly averagehdf = df.groupby(pd.Grouper(key="time", freq="1h")).mean(numeric_only=True)hdf
LONGITUDE
LATITUDE
no2_vertical_column_troposphere
Longitude_SW
Longitude_SE
Longitude_NW
Longitude_NE
Latitude_SW
Latitude_SE
Latitude_NW
Latitude_NE
time
2023-12-18 15:00:00+00:00
-73.283330
40.849232
4.230446e+15
-73.249018
-73.310997
-73.255643
-73.317594
40.860911
40.858619
40.839853
40.837564
2023-12-18 16:00:00+00:00
-73.187944
40.852178
5.531938e+15
-73.153566
-73.215645
-73.160231
-73.222283
40.863865
40.861568
40.842798
40.840506
2023-12-18 17:00:00+00:00
-73.093278
40.857026
6.386307e+15
-73.058850
-73.120997
-73.065552
-73.127672
40.868752
40.866385
40.847680
40.845316
2023-12-18 18:00:00+00:00
-73.111625
40.827774
5.733243e+15
-73.077252
-73.139305
-73.083933
-73.145958
40.839493
40.837118
40.818434
40.816065
# Plot a data column selected from the names abovetempocol ="no2_vertical_column_troposphere"ax = hdf[tempocol].plot(ylim=(0, None), ylabel="NO2 [molec/cm2]")
# Choose a column from above, notice that names are truncated, so they can be weirdtempoikey ="NO2_VERTICAL_CO"
# Now plot a mapcno = pycno.cno(ds.crs_proj4)qm = ds[tempoikey].where(lambda x: x >0).mean(("TSTEP", "LAY")).plot()cno.drawstates(resnum=1)
/usr/local/lib/python3.11/dist-packages/pycno/__init__.py:538: UserWarning: Downloading: https://www.giss.nasa.gov/tools/panoply/overlays/MWDB_Coasts_NA_1.cnob to /root/.pycno/MWDB_Coasts_NA_1.cnob
warnings.warn('Downloading: ' + url + ' to ' + str(datapatho))
Step 5: Make a surface NO2 map
Download CMAQ EQUATES surface and columns
Extract CMAQ to gridded TEMPO
Calculate a transfer function [^1]
Estimate surface NO2
Average and make a plot
[1] No warranty expressed or implied!
# Get a column and surface estimate form CMAQcmaqcolkey ="cmaq.equates.conus.integrated.NO2_COLUMN"qids = api.to_ioapi(cmaqcolkey, bdate="2018-12-21")cmaqsfckey ="cmaq.equates.conus.aconc.NO2"qsds = api.to_ioapi(cmaqsfckey, bdate="2018-12-21")
Align Grids
To align the grids, we have to convert between lambert projections. This is a little complicated, but pyrsig gives you all the tools you need.
get 2d x/y for TEMPO L3 cell centroids
get 2d x/y for TEMPO L3 cell centroids on CMAQ grid
store for later use
pretend the EQUATES data is 2023
# 1. get 2d x/y for TEMPO L3 cell centroidsy, x = xr.broadcast(ds.ROW, ds.COL)# 2. get 2d x/y for TEMPO L3 cell centroids on CMAQ griddstproj = pyproj.Proj(ds.crs_proj4)srcproj = pyproj.Proj(qids.crs_proj4)X, Y = srcproj(*dstproj(x.values, y.values, inverse=True))# 3. store the result for later useds["CMAQX"] = ("COL",), X.mean(0)ds["CMAQY"] = ("ROW",), Y.mean(1)# 4. here we pretend that the CMAQ times align with the TEMPO timesds["CMAQT"] = ("TSTEP",), qsds.TSTEP.values
Extract CMAQ to TEMPO custom L3
We’ll extract data using the CMAQ coordinates
And, add the data to the TEMPO dataset
# Now we extract the CMAQ at the TEMPO locations# all extractions will output time, y, x datadims = ("TSTEP", "ROW", "COL")# all extractions use the same coordinatesselopts =dict(TSTEP=ds["CMAQT"], COL=ds["CMAQX"], ROW=ds["CMAQY"], method="nearest")# 1 atm is the surfaceselopts["LAY"] =1# Get CMAQ surface NO2 (NO2), and tropospheric column (NO2_COLUMN)ds["CMAQ_NO2_SFC"] = dims, qsds["NO2"].sel(**selopts).data, {"units": "ppb"}ds["CMAQ_NO2_COL"] = dims, qids["NO2_COLUMN"].sel(**selopts).data *1e15, {"units": "molec/cm**2"}
# Calculate the transfer functionds["CMAQ_SFC2COL"] = ds["CMAQ_NO2_SFC"] / ds["CMAQ_NO2_COL"]ds["CMAQ_SFC2COL"].attrs.update(units="1")# Calculate the estimate surface NO2ds["TEMPO_SFC"] = ds["NO2_VERTICAL_CO"] * ds["CMAQ_SFC2COL"]ds["TEMPO_SFC"].attrs.update(units="ppb")
Now Plot TEMPO-based Surface NO2
# Now plot the time averagepds = ds.where(ds["NO2_VERTICAL_CO"] >0).mean(("TSTEP", "LAY"), keep_attrs=True)# Controlling figure canvas usegskw =dict(left=0.05, right=0.95, bottom=0.05, top=0.95)fig, axx = plt.subplots(2, 3, figsize=(18, 8), gridspec_kw=gskw)# Put CMAQ on top row : columns 0, 1, and 2qmsfc = pds["CMAQ_NO2_SFC"].plot(ax=axx[0, 0], cmap="viridis")qmcol = pds["CMAQ_NO2_COL"].plot(ax=axx[0, 1], cmap="cividis")pds["CMAQ_SFC2COL"].plot(ax=axx[0, 2], cmap="Reds")# Put TEMPO on bottom row and use the same colorscales as CMAQpds["TEMPO_SFC"].plot(ax=axx[1, 0], norm=qmsfc.norm, cmap=qmsfc.cmap)pds["NO2_VERTICAL_CO"].plot(ax=axx[1, 1], norm=qmcol.norm, cmap=qmcol.cmap)# add state overlays (alternatively)cno.drawstates(ax=axx, resnum=1)# hide the unused axesaxx[1, 2].set(visible=False)# add a reminder_ = fig.text(0.7, 0.25, "Don't look too close.\nRemember, CMAQ is from 2018 and TEMPO is from 2023")
Step 6: Adapt other tutorials
Go to https://barronh.github.io/pyrsig
Navigate to an example you like
Copy and paste chunks of data
Then, run them
Below, I did that for the Pittsburg Pandora evaluation
import pyrsigbbox = (-74.5, 40.0, -73.5, 41)cmaqkey ="cmaq.equates.conus.integrated.NO2_COLUMN"cmaqcol ="NO2_COLUMN"datakey ="pandora.L2_rnvs3p1_8.nitrogen_dioxide_vertical_column_amount"datacol ="nitrogen_dioxide_vertical_column_amount"# Or use TropOMI or any other NO2 columns data# datakey = 'tropomi.offl.no2.nitrogendioxide_tropospheric_column'# Get a CMAQ file from RSIGapi = pyrsig.RsigApi(bbox=bbox, bdate="2018-07-01T12", edate="2018-07-01T23:59:59")ds = api.to_ioapi(cmaqkey)# pair_rsigcmaq will match the CMAQ bbox, bdate, and edatedf = pyrsig.cmaq.pair_rsigcmaq(ds, cmaqcol, datakey)pdf = df.groupby(["time"]).mean(numeric_only=True)z1 = pdf[datacol]z2 = pdf["CMAQ_"+ cmaqcol] *1e15ax = z1.plot(marker="+", linestyle="none", label="Pandora")ax = z2.plot(ax=ax, color="green", label="CMAQ")ax.legend()ax.set(ylim=(0, None), ylabel="NO2 [molecules/cm2]", xlabel="Time [UTC]")ax.figure.savefig("cmaq_pandora.png")
Hopefully, we won’t use this…
I archived all the downloads and posted them. If a server is down, we’ll just use the archived data
# in case of emergency:# !wget -N --no-check-certificate https://gaftp.epa.gov/Air/aqmg/bhenders/tutorial_data/pyrsig_tutorial_2024-05.zip# !unzip pyrsig_tutorial_2024-05.zip# !mkdir nyc; cd nyc; unzip ../pyrsig_tutorial_2024-05.zip
# import zipfile# import glob# paths = glob.glob('*.gz')# with zipfile.ZipFile('pyrsig_tutorial_2024-05.zip', mode='w') as zf:# for p in paths:# zf.write(p)