import pandas as pd
import geopandas as gpd
Load weather data and station metadata#
weather_data = pd.read_csv('./weather_data.csv', index_col=0)
weather_data.index = pd.DatetimeIndex(weather_data.index)
weather_data
BROCKVILLE PCC;MEAN_TEMPERATURE | BROCKVILLE PCC;MIN_TEMPERATURE | BROCKVILLE PCC;MAX_TEMPERATURE | BROCKVILLE PCC;TOTAL_PRECIPITATION | POWELL RIVER A;MEAN_TEMPERATURE | POWELL RIVER A;MIN_TEMPERATURE | POWELL RIVER A;MAX_TEMPERATURE | POWELL RIVER A;TOTAL_PRECIPITATION | STONY MOUNTAIN;MEAN_TEMPERATURE | STONY MOUNTAIN;MIN_TEMPERATURE | ... | CHATHAM POINT;MAX_TEMPERATURE | CHATHAM POINT;TOTAL_PRECIPITATION | GREENWOOD A;MEAN_TEMPERATURE | GREENWOOD A;MIN_TEMPERATURE | GREENWOOD A;MAX_TEMPERATURE | GREENWOOD A;TOTAL_PRECIPITATION | BROOKS;MEAN_TEMPERATURE | BROOKS;MIN_TEMPERATURE | BROOKS;MAX_TEMPERATURE | BROOKS;TOTAL_PRECIPITATION | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LOCAL_DATE | |||||||||||||||||||||
1990-01-01 | -5.0 | -10.0 | 0.0 | 0.0 | 3.6 | 2.0 | 5.2 | 0.0 | -13.0 | -25.0 | ... | 3.5 | 1.2 | 2.2 | -5.0 | 9.3 | 8.4 | 0.5 | -2.2 | 3.1 | 0.0 |
1990-01-02 | -4.0 | -9.0 | 1.0 | 0.0 | 0.9 | -2.0 | 3.8 | 1.8 | -10.5 | -13.0 | ... | 4.2 | 13.2 | -3.3 | -8.0 | 1.4 | 0.0 | -8.6 | -15.2 | -1.9 | 0.0 |
1990-01-03 | 0.5 | -4.0 | 5.0 | 0.0 | 3.7 | 1.9 | 5.5 | 3.2 | -15.0 | -18.0 | ... | 5.5 | 20.2 | -1.7 | -6.4 | 3.1 | 0.0 | -7.1 | -15.4 | 1.2 | 0.0 |
1990-01-04 | 4.0 | 2.0 | 6.0 | 2.4 | 5.9 | 3.8 | 8.0 | 1.0 | -21.0 | -25.0 | ... | 6.9 | 11.8 | -0.3 | -7.1 | 6.6 | 3.0 | -12.3 | -15.1 | -9.4 | 0.0 |
1990-01-05 | -3.0 | -4.0 | -2.0 | 0.0 | 5.7 | 2.2 | 9.2 | 14.0 | -18.5 | -25.0 | ... | 8.5 | 34.2 | -0.7 | -8.0 | 6.6 | 0.0 | -10.3 | -15.1 | -5.4 | 0.7 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-28 | -3.0 | -9.0 | 3.0 | 0.0 | -5.0 | -7.0 | -3.0 | 0.6 | -22.0 | -26.5 | ... | -3.0 | 0.0 | -2.4 | -7.2 | 2.5 | 4.2 | -27.3 | -37.3 | -17.4 | 0.2 |
2021-12-29 | -2.5 | -4.0 | -1.0 | 0.0 | -7.5 | -12.0 | -3.0 | 8.0 | -30.0 | -35.0 | ... | -1.5 | 5.0 | 0.2 | -1.5 | 1.8 | 0.4 | -24.0 | -27.7 | -20.2 | 0.0 |
2021-12-30 | -1.0 | -3.0 | 1.0 | 0.0 | -3.3 | -5.0 | -1.5 | 0.0 | -29.5 | -36.0 | ... | 0.0 | 0.0 | 0.4 | -1.5 | 2.2 | 0.0 | -24.9 | -31.3 | -18.6 | 0.2 |
2021-12-31 | NaN | NaN | NaN | NaN | -6.0 | -10.5 | -1.5 | 0.0 | -31.5 | -35.0 | ... | -0.5 | 0.0 | 0.9 | 0.0 | 1.8 | 3.0 | -27.3 | -33.3 | -21.3 | 0.0 |
2022-01-01 | -0.5 | -2.5 | 1.5 | 8.8 | -3.8 | -10.5 | 3.0 | 11.4 | -32.3 | -38.0 | ... | 4.0 | 29.2 | 3.9 | 0.6 | 7.1 | 5.3 | -19.4 | -33.9 | -4.9 | 0.0 |
11689 rows × 428 columns
station_metadata = pd.read_csv("./station_metadata.csv", index_col=0)
station_metadata
x | y | STATION_NAME | FILENAME | PROVINCE_CODE | min_date | max_date | mean_temp_coverage | max_temp_coverage | total_precipitation_coverage | direction_max_gust_coverage | speed_max_gust_coverage | min_rel_humiudity_coverage | max_rel_humiudity_coverage | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -75.666667 | 44.600000 | BROCKVILLE PCC | station_4236_data.csv | ON | 1990-01-01 | 2022-01-01 | 0.998624 | 0.999570 | 0.999398 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
1 | -124.500278 | 49.834169 | POWELL RIVER A | station_327_data.csv | BC | 1990-01-01 | 2022-01-01 | 0.982635 | 0.982981 | 0.999914 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
2 | -97.166667 | 50.116667 | STONY MOUNTAIN | station_3678_data.csv | MB | 1990-01-01 | 2022-01-01 | 0.999825 | 1.000000 | 0.999912 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
3 | -82.933333 | 42.333333 | WINDSOR RIVERSIDE | station_4715_data.csv | ON | 1993-12-01 | 2022-01-01 | 1.000000 | 1.000000 | 0.999902 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
5 | -77.533333 | 44.116667 | TRENTON A | station_5126_data.csv | ON | 1990-01-01 | 2022-01-01 | 0.953286 | 0.953286 | 0.953114 | 0.889711 | 0.899948 | 0.926445 | 0.926531 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
106 | -93.966667 | 48.633333 | BARWICK | station_3932_data.csv | ON | 1990-01-01 | 2022-01-01 | 0.998912 | 0.999275 | 0.999637 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
107 | -110.283333 | 54.416667 | COLD LAKE A | station_2832_data.csv | AB | 1990-01-01 | 2022-01-01 | 0.999914 | 0.999914 | 0.999914 | 0.908731 | 0.927346 | 0.999052 | 0.999224 |
108 | -125.445556 | 50.333194 | CHATHAM POINT | station_153_data.csv | BC | 1990-01-01 | 2022-01-01 | 0.999359 | 0.999680 | 0.999786 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
109 | -64.916667 | 44.983333 | GREENWOOD A | station_6354_data.csv | NS | 1990-01-01 | 2022-01-01 | 0.999743 | 0.999743 | 0.999743 | 0.965780 | 0.966293 | 0.998460 | 0.998802 |
110 | -111.848897 | 50.555297 | BROOKS | station_2180_data.csv | AB | 1990-01-01 | 2022-01-01 | 0.993552 | 0.993937 | 0.974112 | 0.494274 | 0.494274 | 0.344240 | 0.344048 |
109 rows × 14 columns
Visualize locations of included weather stations#
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
ax = world.loc[world.continent == 'North America'].plot(color='white', edgecolor='black', figsize=(12,12))
ax.scatter(station_metadata.x, station_metadata.y, s=5, color='red')
ax
<matplotlib.axes._subplots.AxesSubplot at 0x7f9d32a2c590>
![../../_images/a1bdd2288a71628e0e4ab3162699ed25457b035e654cc95b4f4c9f982f4953ca.png](../../_images/a1bdd2288a71628e0e4ab3162699ed25457b035e654cc95b4f4c9f982f4953ca.png)
Data visualization#
Examine one series of each feature type.
weather_data[('BROCKVILLE PCC;MEAN_TEMPERATURE')].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f9d332e7f50>
![../../_images/96231d727b06dcde5c558be3dfb0d8c66fce1b25d998fe6d5b7a9eeb17706070.png](../../_images/96231d727b06dcde5c558be3dfb0d8c66fce1b25d998fe6d5b7a9eeb17706070.png)
weather_data[('BROCKVILLE PCC;TOTAL_PRECIPITATION')].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f9d3360da50>
![../../_images/41a11d1b8e1c457be47b4eb1496e9d11a47e6289d4145e7ac1adaaa144b627c3.png](../../_images/41a11d1b8e1c457be47b4eb1496e9d11a47e6289d4145e7ac1adaaa144b627c3.png)
Selecting feature subsets#
feature_name = "MAX_TEMPERATURE"
weather_data[[col for col in weather_data.columns if feature_name in col]]
BROCKVILLE PCC;MAX_TEMPERATURE | POWELL RIVER A;MAX_TEMPERATURE | STONY MOUNTAIN;MAX_TEMPERATURE | WINDSOR RIVERSIDE;MAX_TEMPERATURE | TRENTON A;MAX_TEMPERATURE | PEACHLAND;MAX_TEMPERATURE | CASTLEGAR BCHPA DAM;MAX_TEMPERATURE | OUTLOOK PFRA;MAX_TEMPERATURE | SCOTT CDA;MAX_TEMPERATURE | COWAN;MAX_TEMPERATURE | ... | MACTAQUAC PROV PARK;MAX_TEMPERATURE | LEROY;MAX_TEMPERATURE | WHISTLER;MAX_TEMPERATURE | GREEN ISLAND;MAX_TEMPERATURE | QUINSAM RIVER HATCHERY;MAX_TEMPERATURE | BARWICK;MAX_TEMPERATURE | COLD LAKE A;MAX_TEMPERATURE | CHATHAM POINT;MAX_TEMPERATURE | GREENWOOD A;MAX_TEMPERATURE | BROOKS;MAX_TEMPERATURE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LOCAL_DATE | |||||||||||||||||||||
1990-01-01 | 0.0 | 5.2 | -1.0 | NaN | 1.2 | NaN | 3.0 | 1.5 | -0.5 | 1.0 | ... | 3.0 | 0.5 | 2.0 | 4.0 | 5.5 | -4.0 | -1.6 | 3.5 | 9.3 | 3.1 |
1990-01-02 | 1.0 | 3.8 | -8.0 | NaN | 0.8 | NaN | 1.5 | -8.0 | -10.0 | -11.0 | ... | -0.5 | -9.5 | -2.8 | 7.0 | 5.0 | -0.5 | -10.3 | 4.2 | 1.4 | -1.9 |
1990-01-03 | 5.0 | 5.5 | -12.0 | NaN | 5.1 | NaN | 0.5 | -7.5 | -13.0 | -11.5 | ... | 3.5 | -13.5 | -0.1 | 6.5 | 6.0 | -3.0 | -12.2 | 5.5 | 3.1 | 1.2 |
1990-01-04 | 6.0 | 8.0 | -17.0 | NaN | 5.7 | NaN | 2.0 | -13.5 | -17.0 | -17.0 | ... | 5.0 | -20.0 | 2.2 | 8.0 | 9.0 | -11.0 | -18.8 | 6.9 | 6.6 | -9.4 |
1990-01-05 | -2.0 | 9.2 | -12.0 | NaN | 0.8 | NaN | 2.5 | 4.5 | 3.5 | -14.5 | ... | -1.0 | -3.0 | 4.2 | 9.0 | 8.5 | -15.0 | -10.8 | 8.5 | 6.6 | -5.4 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-12-28 | 3.0 | -3.0 | -17.5 | 2.5 | NaN | NaN | -10.0 | -27.8 | -29.5 | -20.0 | ... | -3.0 | -27.0 | -15.5 | -3.0 | NaN | -12.0 | -28.8 | -3.0 | 2.5 | -17.4 |
2021-12-29 | -1.0 | -3.0 | -25.0 | 4.5 | NaN | -11.5 | -10.0 | -26.4 | -28.5 | -21.0 | ... | 3.0 | -29.0 | NaN | 0.5 | NaN | -19.0 | -28.3 | -1.5 | 1.8 | -20.2 |
2021-12-30 | 1.0 | -1.5 | -23.0 | 5.0 | NaN | -9.5 | -7.0 | -28.9 | -29.4 | -24.0 | ... | 1.0 | -27.0 | NaN | -1.5 | 0.5 | -16.5 | -26.0 | 0.0 | 2.2 | -18.6 |
2021-12-31 | NaN | -1.5 | -28.0 | 6.5 | NaN | -10.5 | -9.0 | -29.9 | -30.3 | -19.0 | ... | 2.5 | -28.0 | -10.5 | 4.5 | -2.0 | -21.0 | -29.0 | -0.5 | 1.8 | -21.3 |
2022-01-01 | 1.5 | 3.0 | -26.5 | 4.0 | NaN | -4.5 | -9.0 | -22.6 | -20.1 | -17.0 | ... | NaN | -22.0 | NaN | 6.0 | 3.0 | -20.5 | -22.1 | 4.0 | 7.1 | -4.9 |
11689 rows × 107 columns
Selecting location subsets#
station_name = "BROCKVILLE PCC"
weather_data[[col for col in weather_data.columns if station_name in col]]
BROCKVILLE PCC;MEAN_TEMPERATURE | BROCKVILLE PCC;MIN_TEMPERATURE | BROCKVILLE PCC;MAX_TEMPERATURE | BROCKVILLE PCC;TOTAL_PRECIPITATION | |
---|---|---|---|---|
LOCAL_DATE | ||||
1990-01-01 | -5.0 | -10.0 | 0.0 | 0.0 |
1990-01-02 | -4.0 | -9.0 | 1.0 | 0.0 |
1990-01-03 | 0.5 | -4.0 | 5.0 | 0.0 |
1990-01-04 | 4.0 | 2.0 | 6.0 | 2.4 |
1990-01-05 | -3.0 | -4.0 | -2.0 | 0.0 |
... | ... | ... | ... | ... |
2021-12-28 | -3.0 | -9.0 | 3.0 | 0.0 |
2021-12-29 | -2.5 | -4.0 | -1.0 | 0.0 |
2021-12-30 | -1.0 | -3.0 | 1.0 | 0.0 |
2021-12-31 | NaN | NaN | NaN | NaN |
2022-01-01 | -0.5 | -2.5 | 1.5 | 8.8 |
11689 rows × 4 columns