# Species Migration
# Veery thrush (Catharus fuscescens) 

import os
import pathlib

import geopandas as gpd
import pandas as pd

# Create data directory in the home folder
data_dir = os.path.join(
    pathlib.Path.home(),
    'earth-analytics',
    'data',
    'species-dist-v2',
)
os.makedirs(data_dir, exist_ok=True)

# Set up the ecoregion boundary URL
ecoregions_url = (
    "https://storage.googleapis.com/teow2016/Ecoregions2017.zip")

# Set up a path to save the data on your machine
ecoregions_dir = os.path.join(data_dir, 'wwf_ecoregions')
os.makedirs(ecoregions_dir, exist_ok=True)
ecoregions_path = os.path.join(ecoregions_dir, 'wwf_ecoregions.shp')

# Only download once
if not os.path.exists(ecoregions_path):
    ecoregions_gdf = gpd.read_file(ecoregions_url)
    ecoregions_gdf.to_file(ecoregions_path)

# Open up the ecoregions boundaries
ecoregions_gdf = (
    gpd.read_file(ecoregions_path)
    .rename(columns={
        'ECO_NAME': 'name',
        'SHAPE_AREA': 'area'})
    [['name', 'area', 'geometry']]
)

# We'll name the index so it will match the other data
ecoregions_gdf.index.name = 'ecoregion'

# Plot the ecoregions to check download
ecoregions_gdf.plot(edgecolor='black', color='skyblue')

<Axes: >

# Define the download URL
gbif_url = (
    "https://github.com/cu-esiil-edu/esiil-learning-portal/releases/download"
    "/data-release/species-distribution-foundations-data.zip")

# Set up a path to save the data on your machine
gbif_dir = os.path.join(data_dir, 'gbif_veery')
os.makedirs(gbif_dir, exist_ok=True)
gbif_path = os.path.join(gbif_dir, 'gbif_veery.zip')

# Only download once
if not os.path.exists(gbif_path):
    # Load the GBIF data
    gbif_df = pd.read_csv(
        gbif_url, 
        delimiter='\t',
        index_col='gbifID',
        usecols=['gbifID', 'decimalLatitude', 'decimalLongitude', 'month'])
    # Save the GBIF data
    gbif_df.to_csv(gbif_path, index=False)

gbif_df = pd.read_csv(gbif_path)
gbif_df.head()

gbif_gdf = (
    gpd.GeoDataFrame(
        gbif_df, 
        geometry=gpd.points_from_xy(
            gbif_df.decimalLongitude, 
            gbif_df.decimalLatitude), 
        crs="EPSG:4326")
    # Select the desired columns
    [['month', 'geometry']]
)
gbif_gdf

gbif_ecoregion_gdf = (
    ecoregions_gdf
    # Match the CRS of the GBIF data and the ecoregions
    .to_crs(gbif_gdf.crs)
    # Find ecoregion for each observation
    .sjoin(
        gbif_gdf,
        how='inner', 
        predicate='contains')
    # Select the required columns
    [['month', 'name']]
)

occurrence_df = (
    gbif_ecoregion_gdf
    # For each ecoregion, for each month...
    .groupby(['ecoregion', 'month'])
    # ...count the number of occurrences
    .agg(occurrences=('name', 'count'))
)


# Get rid of rare observation noise (possible misidentification?)
occurrence_df = occurrence_df[occurrence_df.occurrences>2]

occurrence_df

# Get month names
import calendar

# Libraries for Dynamic mapping
import cartopy.crs as ccrs
import hvplot.pandas
import panel as pn

# Simplify the geometry to speed up processing
ecoregions_gdf.geometry = ecoregions_gdf.simplify(
    .05, preserve_topology=False)

# Change the CRS to Mercator for mapping
ecoregions_gdf = ecoregions_gdf.to_crs(ccrs.Mercator())

# Check that the plot runs
ecoregions_gdf.hvplot(geo=True, crs=ccrs.Mercator())

c:\Users\gpaul\miniconda3\envs\earth-analytics-python\Lib\site-packages\dask\dataframe\__init__.py:42: FutureWarning: 
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  warnings.warn(msg, FutureWarning)

# Join the occurrences with the plotting GeoDataFrame
occurrence_gdf = ecoregions_gdf.join(occurrence_df)

# Get the plot bounds so they don't change with the slider
xmin, ymin, xmax, ymax = occurrence_gdf.total_bounds

# Define the slider widget
slider = pn.widgets.DiscreteSlider(
    name='month', 
    options={calendar.month_name[i]: i for i in range(1, 13)}
)

# Plot occurrence by ecoregion and month
migration_plot = (
    occurrence_gdf
    .hvplot(
        #c='norm_occurrences',
        c='occurrences',
        groupby='month',
        # Use background tiles
        geo=True, crs=ccrs.Mercator(), tiles='CartoLight',
        title="Veery thrush (Catharus fuscescens) migration",
        xlim=(xmin, xmax), ylim=(ymin, ymax),
        frame_height=400,
        colorbar=False,
        widgets={'month': slider},
        widget_location='top'
    )
)

# Save the plot
migration_plot.save('migration.html', embed=True)

# Show the plot
migration_plot

WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='p19302', ...)

BokehModel(combine_events=True, render_bundle={'docs_json': {'2ba42df3-c74d-46bc-b710-9b4bf4cc9725': {'version…

		occurrences
ecoregion	month
16	5	2980
	6	3197
	7	1776
	8	455
	9	142
...	...	...
833	7	293
	8	40
	9	11
839	9	25
839	10	7