━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.3/9.3 MB 38.2 MB/s eta 0:00:00
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 250.0/250.0 kB 11.5 MB/s eta 0:00:00
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 149.6/149.6 kB 8.2 MB/s eta 0:00:00
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 26.0 MB/s eta 0:00:00
Now import the libraries.
If you get a ModuleNotFoundError:, try restarting the kernel.
# Import Librariesimport pyprojimport xarray as xrimport pyrsigimport pandas as pdimport pycnoimport getpassimport matplotlib.pyplot as plt
Step 2: Exploring Data
Import libraries
Prepare a pyrsig object
# Choosing a Northeast domain for 2023 December 18th# Dec 18th is in the public data.locname ='nyc'bbox = (-74.8, 40.32, -71.43, 41.4)bdate ='2023-12-18'
api = pyrsig.RsigApi(bdate=bdate, bbox=bbox, workdir=locname, gridfit=True)# api_key = getpass.getpass('Enter TEMPO key (anonymous if unknown):')api_key ='anonymous'# using public, so using anonymousapi.tempo_kw['api_key'] = api_key
# after the cell runs, click on the table button.# Then use filters to find tempo data producs by names that start with tempodescdf = api.descriptions()descdf# descdf.query('name.str.contains("tempo")')
# By default, the pyrsig uses 'ascii' backend, but 'xdr' is faster;# both look the same in python, but the files are very different.# I'm using xdr here for speeddf = api.to_dataframe(tempokey, backend='xdr')df
Timestamp(UTC)
LONGITUDE(deg)
LATITUDE(deg)
no2_vertical_column_troposphere(molecules/cm2)
Longitude_SW(deg)
Longitude_SE(deg)
Longitude_NW(deg)
Longitude_NE(deg)
Latitude_SW(deg)
Latitude_SE(deg)
Latitude_NW(deg)
Latitude_NE(deg)
0
2023-12-18T15:11:00+0000
-73.316078
41.398060
3.093594e+15
-73.281212
-73.344002
-73.288191
-73.351015
41.410378
41.407287
41.388793
41.385610
1
2023-12-18T15:11:00+0000
-72.758522
41.398186
7.423907e+13
-72.723591
-72.786112
-72.730680
-72.793116
41.410442
41.408224
41.389041
41.386914
2
2023-12-18T15:11:00+0000
-72.820793
41.396824
4.233557e+14
-72.786112
-72.848591
-72.793116
-72.855520
41.408224
41.406334
41.386914
41.385095
3
2023-12-18T15:11:00+0000
-72.883331
41.394588
1.407843e+15
-72.848591
-72.911537
-72.855520
-72.918381
41.406334
41.403631
41.385095
41.382483
4
2023-12-18T15:11:00+0000
-72.946648
41.391434
3.048500e+15
-72.911537
-72.974806
-72.918381
-72.981564
41.403631
41.400612
41.382483
41.379558
...
...
...
...
...
...
...
...
...
...
...
...
...
890
2023-12-18T18:18:00+0000
-73.764481
40.330441
5.616886e+15
-73.730804
-73.792147
-73.737051
-73.798367
40.341922
40.339574
40.321104
40.318759
891
2023-12-18T18:18:00+0000
-73.826035
40.327888
4.467097e+15
-73.792147
-73.853292
-73.798367
-73.859488
40.339574
40.337360
40.318759
40.316548
892
2023-12-18T18:18:00+0000
-73.886749
40.326015
8.049984e+15
-73.853292
-73.913992
-73.859488
-73.920162
40.337360
40.335357
40.316548
40.314547
893
2023-12-18T18:18:00+0000
-73.947411
40.323883
1.004033e+16
-73.913992
-73.975153
-73.920162
-73.981304
40.335357
40.333200
40.314547
40.312383
894
2023-12-18T18:18:00+0000
-74.009056
40.321693
8.188616e+15
-73.975153
-74.036465
-73.981304
-74.042583
40.333200
40.330881
40.312383
40.310077
9569 rows × 12 columns
# Do it again, but cleanup the keys and add time object# Notice that the file is reuseddf = api.to_dataframe(tempokey, unit_keys=False, parse_dates=True, backend='xdr')df
Using cached: nyc/tempo.l2.no2.vertical_column_troposphere_2023-12-18T000000Z_2023-12-18T235959Z.xdr.gz
Timestamp
LONGITUDE
LATITUDE
no2_vertical_column_troposphere
Longitude_SW
Longitude_SE
Longitude_NW
Longitude_NE
Latitude_SW
Latitude_SE
Latitude_NW
Latitude_NE
time
0
2023-12-18T15:11:00+0000
-73.316078
41.398060
3.093594e+15
-73.281212
-73.344002
-73.288191
-73.351015
41.410378
41.407287
41.388793
41.385610
2023-12-18 15:11:00+00:00
1
2023-12-18T15:11:00+0000
-72.758522
41.398186
7.423907e+13
-72.723591
-72.786112
-72.730680
-72.793116
41.410442
41.408224
41.389041
41.386914
2023-12-18 15:11:00+00:00
2
2023-12-18T15:11:00+0000
-72.820793
41.396824
4.233557e+14
-72.786112
-72.848591
-72.793116
-72.855520
41.408224
41.406334
41.386914
41.385095
2023-12-18 15:11:00+00:00
3
2023-12-18T15:11:00+0000
-72.883331
41.394588
1.407843e+15
-72.848591
-72.911537
-72.855520
-72.918381
41.406334
41.403631
41.385095
41.382483
2023-12-18 15:11:00+00:00
4
2023-12-18T15:11:00+0000
-72.946648
41.391434
3.048500e+15
-72.911537
-72.974806
-72.918381
-72.981564
41.403631
41.400612
41.382483
41.379558
2023-12-18 15:11:00+00:00
...
...
...
...
...
...
...
...
...
...
...
...
...
...
890
2023-12-18T18:18:00+0000
-73.764481
40.330441
5.616886e+15
-73.730804
-73.792147
-73.737051
-73.798367
40.341922
40.339574
40.321104
40.318759
2023-12-18 18:18:00+00:00
891
2023-12-18T18:18:00+0000
-73.826035
40.327888
4.467097e+15
-73.792147
-73.853292
-73.798367
-73.859488
40.339574
40.337360
40.318759
40.316548
2023-12-18 18:18:00+00:00
892
2023-12-18T18:18:00+0000
-73.886749
40.326015
8.049984e+15
-73.853292
-73.913992
-73.859488
-73.920162
40.337360
40.335357
40.316548
40.314547
2023-12-18 18:18:00+00:00
893
2023-12-18T18:18:00+0000
-73.947411
40.323883
1.004033e+16
-73.913992
-73.975153
-73.920162
-73.981304
40.335357
40.333200
40.314547
40.312383
2023-12-18 18:18:00+00:00
894
2023-12-18T18:18:00+0000
-74.009056
40.321693
8.188616e+15
-73.975153
-74.036465
-73.981304
-74.042583
40.333200
40.330881
40.312383
40.310077
2023-12-18 18:18:00+00:00
9569 rows × 13 columns
Step 3: Compare to observations
Make an hourly average product.
Make a simple time-series plot
Do the same with airnow to compare
# Make an hourly averagehdf = df.groupby(pd.Grouper(key='time', freq='1h')).mean(numeric_only=True)hdf
LONGITUDE
LATITUDE
no2_vertical_column_troposphere
Longitude_SW
Longitude_SE
Longitude_NW
Longitude_NE
Latitude_SW
Latitude_SE
Latitude_NW
Latitude_NE
time
2023-12-18 15:00:00+00:00
-73.283330
40.849232
4.230446e+15
-73.249018
-73.310997
-73.255643
-73.317594
40.860911
40.858619
40.839853
40.837564
2023-12-18 16:00:00+00:00
-73.187944
40.852178
5.531938e+15
-73.153566
-73.215645
-73.160231
-73.222283
40.863865
40.861568
40.842798
40.840506
2023-12-18 17:00:00+00:00
-73.093278
40.857026
6.386307e+15
-73.058850
-73.120997
-73.065552
-73.127672
40.868752
40.866385
40.847680
40.845316
2023-12-18 18:00:00+00:00
-73.111625
40.827774
5.733243e+15
-73.077252
-73.139305
-73.083933
-73.145958
40.839493
40.837118
40.818434
40.816065
# Plot a data column selected from the names abovetempocol ='no2_vertical_column_troposphere'ax = hdf[tempocol].plot(ylim=(0, None), ylabel='NO2 [molec/cm2]')
# Choose a column from above, notice that names are truncated, so they can be weirdtempoikey ='NO2_VERTICAL_CO'
# Now plot a mapcno = pycno.cno(ds.crs_proj4)qm = ds[tempoikey].where(lambda x: x>0).mean(('TSTEP', 'LAY')).plot()cno.drawstates(resnum=1)
/usr/local/lib/python3.11/dist-packages/pycno/__init__.py:538: UserWarning: Downloading: https://www.giss.nasa.gov/tools/panoply/overlays/MWDB_Coasts_NA_1.cnob to /root/.pycno/MWDB_Coasts_NA_1.cnob
warnings.warn('Downloading: ' + url + ' to ' + str(datapatho))
Step 5: Make a surface NO2 map
Download CMAQ EQUATES surface and columns
Extract CMAQ to gridded TEMPO
Calculate a transfer function [^1]
Estimate surface NO2
Average and make a plot
[1] No warranty expressed or implied!
# Get a column and surface estimate form CMAQcmaqcolkey ='cmaq.equates.conus.integrated.NO2_COLUMN'qids = api.to_ioapi(cmaqcolkey, bdate='2018-12-21')cmaqsfckey ='cmaq.equates.conus.aconc.NO2'qsds = api.to_ioapi(cmaqsfckey, bdate='2018-12-21')
Align Grids
To align the grids, we have to convert between lambert projections. This is a little complicated, but pyrsig gives you all the tools you need.
get 2d x/y for TEMPO L3 cell centroids
get 2d x/y for TEMPO L3 cell centroids on CMAQ grid
store for later use
pretend the EQUATES data is 2023
# 1. get 2d x/y for TEMPO L3 cell centroidsy, x = xr.broadcast(ds.ROW, ds.COL)# 2. get 2d x/y for TEMPO L3 cell centroids on CMAQ griddstproj = pyproj.Proj(ds.crs_proj4)srcproj = pyproj.Proj(qids.crs_proj4)X, Y = srcproj(*dstproj(x.values, y.values, inverse=True))# 3. store the result for later useds['CMAQX'] = ('COL',), X.mean(0)ds['CMAQY'] = ('ROW',), Y.mean(1)# 4. here we pretend that the CMAQ times align with the TEMPO timesds['CMAQT'] = ('TSTEP',), qsds.TSTEP.values
Extract CMAQ to TEMPO custom L3
We’ll extract data using the CMAQ coordinates
And, add the data to the TEMPO dataset
# Now we extract the CMAQ at the TEMPO locations# all extractions will output time, y, x datadims = ('TSTEP', 'ROW', 'COL')# all extractions use the same coordinatesselopts =dict(TSTEP=ds['CMAQT'], COL=ds['CMAQX'], ROW=ds['CMAQY'], method='nearest')# 1 atm is the surfaceselopts['LAY'] =1# Get CMAQ surface NO2 (NO2), and tropospheric column (NO2_COLUMN)ds['CMAQ_NO2_SFC'] = dims, qsds['NO2'].sel(**selopts).data, {'units': 'ppb'}ds['CMAQ_NO2_COL'] = dims, qids['NO2_COLUMN'].sel(**selopts).data *1e15, {'units': 'molec/cm**2'}
# Calculate the transfer functionds['CMAQ_SFC2COL'] = ds['CMAQ_NO2_SFC'] / ds['CMAQ_NO2_COL']ds['CMAQ_SFC2COL'].attrs.update(units='1')# Calculate the estimate surface NO2ds['TEMPO_SFC'] = ds['NO2_VERTICAL_CO'] * ds['CMAQ_SFC2COL']ds['TEMPO_SFC'].attrs.update(units='ppb')
Now Plot TEMPO-based Surface NO2
# Now plot the time averagepds = ds.where(ds['NO2_VERTICAL_CO'] >0).mean(('TSTEP', 'LAY'), keep_attrs=True)# Controlling figure canvas usegskw =dict(left=0.05, right=0.95, bottom=0.05, top=0.95)fig, axx = plt.subplots(2, 3, figsize=(18, 8), gridspec_kw=gskw)# Put CMAQ on top row : columns 0, 1, and 2qmsfc = pds['CMAQ_NO2_SFC'].plot(ax=axx[0, 0], cmap='viridis')qmcol = pds['CMAQ_NO2_COL'].plot(ax=axx[0, 1], cmap='cividis')pds['CMAQ_SFC2COL'].plot(ax=axx[0, 2], cmap='Reds')# Put TEMPO on bottom row and use the same colorscales as CMAQpds['TEMPO_SFC'].plot(ax=axx[1, 0], norm=qmsfc.norm, cmap=qmsfc.cmap)pds['NO2_VERTICAL_CO'].plot(ax=axx[1, 1], norm=qmcol.norm, cmap=qmcol.cmap)# add state overlays (alternatively)cno.drawstates(ax=axx, resnum=1)# hide the unused axesaxx[1, 2].set(visible=False)# add a reminder_ = fig.text(0.7, 0.25, 'Don\'t look too close.\nRemember, CMAQ is from 2018 and TEMPO is from 2023')
Step 6: Adapt other tutorials
Go to https://barronh.github.io/pyrsig
Navigate to an example you like
Copy and paste chunks of data
Then, run them
Below, I did that for the Pittsburg Pandora evaluation
import pyrsigbbox = (-74.5, 40., -73.5, 41)cmaqkey ='cmaq.equates.conus.integrated.NO2_COLUMN'cmaqcol ='NO2_COLUMN'datakey ='pandora.L2_rnvs3p1_8.nitrogen_dioxide_vertical_column_amount'datacol ='nitrogen_dioxide_vertical_column_amount'# Or use TropOMI or any other NO2 columns data# datakey = 'tropomi.offl.no2.nitrogendioxide_tropospheric_column'# Get a CMAQ file from RSIGapi = pyrsig.RsigApi( bbox=bbox, bdate='2018-07-01T12', edate='2018-07-01T23:59:59')ds = api.to_ioapi(cmaqkey)# pair_rsigcmaq will match the CMAQ bbox, bdate, and edatedf = pyrsig.cmaq.pair_rsigcmaq(ds, cmaqcol, datakey)pdf = df.groupby(['time']).mean(numeric_only=True)z1 = pdf[datacol]z2 = (pdf['CMAQ_'+ cmaqcol] *1e15)ax = z1.plot(marker='+', linestyle='none', label='Pandora')ax = z2.plot(ax=ax, color='green', label='CMAQ')ax.legend()ax.set(ylim=(0, None), ylabel='NO2 [molecules/cm2]', xlabel='Time [UTC]')ax.figure.savefig('cmaq_pandora.png')
Hopefully, we won’t use this…
I archived all the downloads and posted them. If a server is down, we’ll just use the archived data
# in case of emergency:# !wget -N --no-check-certificate https://gaftp.epa.gov/Air/aqmg/bhenders/tutorial_data/pyrsig_tutorial_2024-05.zip# !unzip pyrsig_tutorial_2024-05.zip# !mkdir nyc; cd nyc; unzip ../pyrsig_tutorial_2024-05.zip
# import zipfile# import glob# paths = glob.glob('*.gz')# with zipfile.ZipFile('pyrsig_tutorial_2024-05.zip', mode='w') as zf:# for p in paths:# zf.write(p)