import os
import pickle
import numpy as np
import pandas as pd

import pygskin

# constants for saving and loading data
year = 2023
path_to_files = os.path.join(os.getcwd() + os.sep + "data")

# calculation constants
RANDOM_STATE = 42

# plot constants and variables
USA_BOUNDS = [-121, -75, 23, 50]
POINT_SIZE = 20
POINT_ZORDER = 2
LINE_ZORDER = 1.5

conference_colors = ["red", "blue", "green", "orange", "purple", "brown", "pink", "lime", "cyan", "magenta", "yellow", "gray", "olive", "maroon", "navy", "teal", "gold", "darkorange", "darkgreen", "darkred", "darkblue", "darkgray", "darkcyan", "darkmagenta", "darkkhaki", "darkgoldenrod", "darkslategray", "darkolivegreen", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dimgray", "dodgerblue", "firebrick", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "goldenrod", "greenyellow", "hotpink", "indianred"]

if os.path.exists(path_to_files + os.sep + str(year) + os.sep + f"season_{year}.cfb"):
    print(f"Loading {year} season from file...")
    with open(path_to_files + os.sep + str(year) + os.sep + f"season_{year}.cfb", "rb") as f:
        season = pickle.load(f)
    print(f"Loaded {year} season from file.")
else:
    print(f"Loading {year} season from API...")
    season = pygskin.Season.from_cfbd_api(year)
    print(f"Loaded {year} season from API.")
    # create directory for season, if it doesn't exist
    if not os.path.exists(path_to_files + os.sep + str(year)):
        os.mkdir(path_to_files + os.sep + str(year))
    # pickle season
    with open(path_to_files + os.sep + str(year) + os.sep + f"season_{year}.cfb", "wb") as f:
        pickle.dump(season, f)
    # pickle all Teams in Season
    for school in season.teams.keys():
        with open(path_to_files + os.sep + str(year) + os.sep + f"team_{school}.cfb", "wb") as f:
            pickle.dump(season.teams[school], f)
    print(f"Saved {year} season to file.")

Loading 2023 season from file...
Loaded 2023 season from file.

# Schools moving to ACC: SMU, Stanford, Cal
season.teams_dict["SMU"].info["conference"] = "ACC"
season.teams_dict["Stanford"].info["conference"] = "ACC"
season.teams_dict["California"].info["conference"] = "ACC"
# Schools moving to Big 12: Arizona State, Arizona, Colorado, Utah
season.teams_dict["Arizona State"].info["conference"] = "Big 12"
season.teams_dict["Arizona"].info["conference"] = "Big 12"
season.teams_dict["Colorado"].info["conference"] = "Big 12"
season.teams_dict["Utah"].info["conference"] = "Big 12"
# Schools moving to Big Ten: UCLA, USC, Washington, Oregon
season.teams_dict["UCLA"].info["conference"] = "Big Ten"
season.teams_dict["USC"].info["conference"] = "Big Ten"
season.teams_dict["Washington"].info["conference"] = "Big Ten"
season.teams_dict["Oregon"].info["conference"] = "Big Ten"
# Schools moving to SEC: Texas, Oklahoma
season.teams_dict["Texas"].info["conference"] = "SEC"
season.teams_dict["Oklahoma"].info["conference"] = "SEC"
# Schools moving to AAC: Army
season.teams_dict["Army"].info["conference"] = "American Athletic"
# Schools moving to C-USA: Kennesaw State
season.teams_dict["Kent State"].info["conference"] = "Conference USA"
# Schools moving up from FCS: Delaware
delaware = pygskin.Team(
    school="Delaware",
    year=2023,
    info={
        "id": 99999,
        "school": "Delaware",
        "mascot": "Blue Hens",
        "abbreviation": "DEL",
        "alt_name_1": "University of Delaware",
        "alt_name_2": "UD",
        "alt_name_3": "Blue Hens",
        "classification": "None",
        "conference": "Conference USA",
        "division": "None",
        "color": "#00539f",
        "alt_color": "#ffd200",
        "logos": [
            "https://a.espncdn.com/i/teamlogos/ncaa/500-dark/48.png"
        ],
        "twitter": "@UDBlueHens",
        "location": {
            "venue_id": None,
            "name": "Delaware Stadium",
            "city": "Newark",
            "state": "DE",
            "zip": "19716",
            "country_code": "US",
            "timezone": "America/New_York",
            "latitude": 39.6617,
            "longitude": -75.7488,
            "elevation": 98.0,
            "capacity": 18500,
            "year_constructed": 1952,
            "grass": True,
            "dome": False,
        }
    },
    wins=0,
    losses=0,
    ties=0,
)
season.teams_dict["Delaware"] = delaware
analysis = pygskin.SeasonAnalyzer(season)

import cartopy.mpl.geoaxes
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

# Create a figure with an axes object on which we will plot. Pass the projection to that axes.
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection=ccrs.LambertConformal(central_longitude=-100, central_latitude=45)))
ax: cartopy.mpl.geoaxes.GeoAxes
# Set the extent of the map to the contiguous United States
ax.set_extent(USA_BOUNDS)
# Add state boundaries to plot
ax.add_feature(cfeature.STATES)
# Add country borders to plot
ax.add_feature(cfeature.BORDERS)
# Add ocean to plot
ax.add_feature(cfeature.OCEAN)
# Add lakes to plot
ax.add_feature(cfeature.LAKES)
# Add land to plot
ax.add_feature(cfeature.LAND)
# Add coastlines to plot
ax.coastlines()

# Add the locations of the schools to the plot with a different color for each conference
for conference in analysis.school_locations["conference"].unique():
    ax.scatter(analysis.school_locations[analysis.school_locations["conference"] == conference]["longitude"], analysis.school_locations[analysis.school_locations["conference"] == conference]["latitude"], color=conference_colors[conference], s=20, transform=ccrs.Geodetic(), linewidth=0.5, edgecolor="black", zorder=2)

# Add a circle around each conference with a radius of the max distance between the centroid and the schools in the conference
for conference in analysis.school_locations["conference"].unique():
    conf_schools = analysis.school_locations[analysis.school_locations["conference"] == conference]
    centroid = np.average(conf_schools[["longitude", "latitude"]], axis=0)
    max_distance = max([np.linalg.norm(np.array([school.longitude, school.latitude]) - centroid) for school in conf_schools.itertuples()])
    # ax.add_patch(plt.Circle((centroid[0], centroid[1]), max_distance, transform=ccrs.Geodetic(), fill=False, color=conference_colors[conference]))
    
# Add a title to the plot
ax.set_title("2024 FBS Schools")

# create a legend with the names of the conferences
legend_elements = [plt.Line2D([0], [0], marker="o", color="w", label=analysis.conferences[conference], markerfacecolor=conference_colors[conference], markersize=10) for conference in analysis.school_locations["conference"].unique()]
ax.legend(title="Conference", bbox_to_anchor=(1.05, 1), loc="upper left", handles=legend_elements)
plt.show()

import math

def haversine(coord1: tuple[int, int], coord2: tuple[int, int]) -> float:
    """Returns the distance (in miles) between two points on the Earth's surface using the Haversine formula.

    Args:
        coord1 (tuple[int, int]): Longitude and latitude of the first point
        coord2 (tuple[int, int]): Longitude and latitude of the second point

    Returns:
        float: Distance between the two points in miles
    """
    R = 3958.8  # Radius of the Earth in miles

    # Coordinates in decimal degrees (e.g., 43.60, -79.49)
    lon1, lat1 = coord1
    lon2, lat2 = coord2

    # Convert decimal degrees to radians
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    # Haversine formula
    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c  # Output distance in miles
    return distance

conf_dist_list = []
for conference in analysis.school_locations["conference"].unique():
    for school in analysis.school_locations[analysis.school_locations["conference"] == conference].itertuples():
        for school_2 in analysis.school_locations[analysis.school_locations["conference"] == conference].itertuples():
            if school == school_2:  # don't calculate distance between the same school
                continue
            dist = haversine((school.longitude, school.latitude), (school_2.longitude, school_2.latitude))
            conf_dist_list.append([conference, school.school, school_2.school, dist])
conference_distances = pd.DataFrame(conf_dist_list, columns=["conference", "school_1", "school_2", "distance"])
for conference in conference_distances["conference"].unique():
    conference_distances["max_distance"] = conference_distances[conference_distances["conference"] == conference]["distance"].max()
    conference_distances["min_distance"] = conference_distances[conference_distances["conference"] == conference]["distance"].min()
    conference_distances["avg_distance"] = conference_distances[conference_distances["conference"] == conference]["distance"].mean()
    conference_distances["median_distance"] = conference_distances[conference_distances["conference"] == conference]["distance"].median()

# create a scatter plot of the average distance between schools in a conference and the median distance between schools in a conference
fig, ax = plt.subplots()
ax: plt.Axes
for conference in conference_distances["conference"].unique():
    conf = conference_distances[conference_distances["conference"] == conference]
    mean = conference_distances[conference_distances["conference"] == conference]["distance"].mean()
    median = conference_distances[conference_distances["conference"] == conference]["distance"].median()
    ax.scatter(x=mean, y=median, label=analysis.conferences[conference], color=conference_colors[conference])
ax.set_xlabel("Mean Distance (miles)")
ax.set_ylabel("Median Distance (miles)")
ax.set_title("Mean vs. Median Distance Between Schools in a Conference")
ax.legend(title="Conference", bbox_to_anchor=(1.05, 1), loc="upper left")

plt.show()

closest_pairs = []
farthest_pairs = []
for conference in conference_distances["conference"].unique():
    min_dist = conference_distances[conference_distances["conference"] == conference]["distance"].min()
    max_dist = conference_distances[conference_distances["conference"] == conference]["distance"].max()
    closest_pairs.append((conference_distances[conference_distances['distance'] == min_dist]["school_1"].values[0], conference_distances[conference_distances['distance'] == min_dist]['school_2'].values[0]))
    farthest_pairs.append((conference_distances[conference_distances['distance'] == max_dist]["school_1"].values[0], conference_distances[conference_distances['distance'] == max_dist]['school_2'].values[0]))

# create a map of the closest pairs of schools in each conference, drawing a line between the two schools
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection=ccrs.LambertConformal(central_longitude=-100, central_latitude=45)))
ax: cartopy.mpl.geoaxes.GeoAxes
ax.set_extent(USA_BOUNDS)
ax.add_feature(cfeature.STATES)
ax.add_feature(cfeature.BORDERS)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAKES)
ax.add_feature(cfeature.LAND)
ax.coastlines()

for i, conference in enumerate(analysis.school_locations["conference"].unique()):
    # get closest pair of schools in conference
    ax.scatter(analysis.school_locations[analysis.school_locations["school"] == closest_pairs[i][0]]["longitude"], analysis.school_locations[analysis.school_locations["school"] == closest_pairs[i][0]]["latitude"], color=conference_colors[conference], s=20, transform=ccrs.Geodetic(), linewidth=0.5, edgecolor="black", zorder=2)
    ax.scatter(analysis.school_locations[analysis.school_locations["school"] == closest_pairs[i][1]]["longitude"], analysis.school_locations[analysis.school_locations["school"] == closest_pairs[i][1]]["latitude"], color=conference_colors[conference], s=20, transform=ccrs.Geodetic(), linewidth=0.5, edgecolor="black", zorder=2)

for conference in conference_distances["conference"].unique():
    conf = conference_distances[conference_distances["conference"] == conference]
    closest_pair = conf[conf["distance"] == conf["distance"].min()]
    school_1 = analysis.school_locations[analysis.school_locations["school"] == closest_pair["school_1"].values[0]].iloc[0]
    school_2 = analysis.school_locations[analysis.school_locations["school"] == closest_pair["school_2"].values[0]].iloc[0]
    ax.plot([school_1.longitude, school_2.longitude], [school_1.latitude, school_2.latitude], color=conference_colors[conference], transform=ccrs.Geodetic(), zorder=LINE_ZORDER)
    print(f"Closest pair of schools in {analysis.conferences[conference]}: {school_1.school} and {school_2.school}")

ax.set_title("Closest Pairs of Schools in Each Conference")
# add legend
handles, labels = ax.get_legend_handles_labels()
fig_legend = plt.figure(figsize=(2,2))
ax.legend(title="Conference", bbox_to_anchor=(1.05, 1), loc="upper left", handles=legend_elements)

plt.show()

Closest pair of schools in Mountain West: Colorado State and Wyoming
Closest pair of schools in Mid-American: Bowling Green and Toledo
Closest pair of schools in SEC: Alabama and Mississippi State
Closest pair of schools in Sun Belt: South Alabama and Southern Mississippi
Closest pair of schools in Big 12: BYU and Utah
Closest pair of schools in American Athletic: Navy and Temple
Closest pair of schools in ACC: Duke and North Carolina
Closest pair of schools in FBS Independents: Connecticut and UMass
Closest pair of schools in Conference USA: New Mexico State and UTEP
Closest pair of schools in Big Ten: UCLA and USC
Closest pair of schools in Pac-12: Oregon State and Washington State

<Figure size 200x200 with 0 Axes>

# create a map of the farthest pairs of schools in each conference, drawing a line between the two schools
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection=ccrs.LambertConformal(central_longitude=-100, central_latitude=45)))
ax: cartopy.mpl.geoaxes.GeoAxes
ax.set_extent(USA_BOUNDS)
ax.add_feature(cfeature.STATES)
ax.add_feature(cfeature.BORDERS)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAKES)
ax.add_feature(cfeature.LAND)
ax.coastlines()

for i, conference in enumerate(analysis.school_locations["conference"].unique()):
    # get farthest pair of schools in conference
    ax.scatter(analysis.school_locations[analysis.school_locations["school"] == farthest_pairs[i][0]]["longitude"], analysis.school_locations[analysis.school_locations["school"] == farthest_pairs[i][0]]["latitude"], color=conference_colors[conference], s=20, transform=ccrs.Geodetic(), linewidth=0.5, edgecolor="black", zorder=2)
    ax.scatter(analysis.school_locations[analysis.school_locations["school"] == farthest_pairs[i][1]]["longitude"], analysis.school_locations[analysis.school_locations["school"] == farthest_pairs[i][1]]["latitude"], color=conference_colors[conference], s=20, transform=ccrs.Geodetic(), linewidth=0.5, edgecolor="black", zorder=2)
    print(f"Farthest pair of schools in {analysis.conferences[conference]}: {farthest_pairs[i][0]} and {farthest_pairs[i][1]}")

for conference in conference_distances["conference"].unique():
    conf = conference_distances[conference_distances["conference"] == conference]
    farthest_pair = conf[conf["distance"] == conf["distance"].max()]
    school_1 = analysis.school_locations[analysis.school_locations["school"] == farthest_pair["school_1"].values[0]].iloc[0]
    school_2 = analysis.school_locations[analysis.school_locations["school"] == farthest_pair["school_2"].values[0]].iloc[0]
    ax.plot([school_1.longitude, school_2.longitude], [school_1.latitude, school_2.latitude], color=conference_colors[conference], transform=ccrs.Geodetic(), zorder=LINE_ZORDER)

ax.set_title("Farthest Pairs of Schools in Each Conference")
handles, labels = ax.get_legend_handles_labels()
fig_legend = plt.figure(figsize=(2,2))
ax.legend(title="Conference", bbox_to_anchor=(1.05, 1), loc="upper left", handles=legend_elements)

plt.show()

Farthest pair of schools in Mountain West: Air Force and Hawai'i
Farthest pair of schools in Mid-American: Buffalo and Northern Illinois
Farthest pair of schools in SEC: South Carolina and Texas
Farthest pair of schools in Sun Belt: Old Dominion and Texas State
Farthest pair of schools in Big 12: UCF and Utah
Farthest pair of schools in American Athletic: Army and UT San Antonio
Farthest pair of schools in ACC: Boston College and Stanford
Farthest pair of schools in FBS Independents: Notre Dame and UMass
Farthest pair of schools in Conference USA: UTEP and Delaware
Farthest pair of schools in Big Ten: Oregon and Rutgers
Farthest pair of schools in Pac-12: Oregon State and Washington State

<Figure size 200x200 with 0 Axes>

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

print("Training KNN model...")

# Preprocess the data. Drop school and conference columns, use conference as target
df = analysis.school_locations.copy()
# remove schools with the conference name "FBS Independents"
df = df[df["conference"] != analysis.conferences.index("FBS Independents")]

X = df.drop(columns=["school", "conference"])
y = df["conference"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
knn = KNeighborsClassifier(n_neighbors=5)
# train the model
knn.fit(X_train, y_train)

# X has columns longitude and latitude
# y has column conference

# test the model
y_pred = knn.predict(X_test)
# print accuracy
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

knn_df = pd.DataFrame(columns=["school", "longitude", "latitude", "conference", "predicted", "correct"])
# use the coordinates from the test set to find the school name
knn_df["school"] = df[df["longitude"].isin(X_test["longitude"]) & df["latitude"].isin(X_test["latitude"])]["school"]
knn_df["longitude"] = X_test["longitude"]
knn_df["latitude"] = X_test["latitude"]
knn_df["conference"] = y_test
knn_df["predicted"] = y_pred
knn_df["correct"] = knn_df["conference"] == knn_df["predicted"]

printable_knn_df = knn_df.drop(columns=["longitude", "latitude"])
printable_knn_df["conference"] = [analysis.conferences[conference] for conference in printable_knn_df["conference"]]
printable_knn_df["predicted"] = [analysis.conferences[conference] for conference in printable_knn_df["predicted"]]

print(printable_knn_df)

Training KNN model...
Accuracy: 0.25925925925925924
                   school         conference          predicted  correct
4                 Arizona             Big 12       Mid-American    False
10             Ball State       Mid-American            Big Ten    False
11                 Baylor             Big 12                ACC    False
18       Central Michigan       Mid-American                SEC    False
19              Charlotte  American Athletic      Mountain West    False
27          East Carolina  American Athletic     Conference USA    False
28       Eastern Michigan       Mid-American      Mountain West    False
32          Florida State                ACC     Conference USA    False
37           Georgia Tech                ACC           Sun Belt    False
41                Indiana            Big Ten                ACC    False
46                 Kansas             Big 12                SEC    False
56               Marshall           Sun Belt       Mid-American    False
57               Maryland            Big Ten  American Athletic    False
65      Mississippi State                SEC      Mountain West    False
70                 Nevada      Mountain West      Mountain West     True
71             New Mexico      Mountain West  American Athletic    False
82           Old Dominion           Sun Belt                SEC    False
83               Ole Miss                SEC                ACC    False
86             Penn State            Big Ten                SEC    False
95          South Alabama           Sun Belt       Mid-American    False
97   Southern Mississippi           Sun Belt       Mid-American    False
99               Stanford                ACC       Mid-American    False
102                Temple  American Athletic                ACC    False
107            Texas Tech             Big 12                SEC    False
118                  Utah             Big 12                ACC    False
125           Wake Forest                ACC                SEC    False
130         West Virginia             Big 12             Big 12     True

# Create a figure with an axes object on which we will plot. Pass the projection to that axes.
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection=ccrs.LambertConformal(central_longitude=-100, central_latitude=45)))
ax: cartopy.mpl.geoaxes.GeoAxes
# Set the extent of the map to the contiguous United States
ax.set_extent(USA_BOUNDS)
# Add state boundaries to plot
ax.add_feature(cfeature.STATES)
# Add country borders to plot
ax.add_feature(cfeature.BORDERS)
# Add ocean to plot
ax.add_feature(cfeature.OCEAN)
# Add lakes to plot
ax.add_feature(cfeature.LAKES)
# Add land to plot
ax.add_feature(cfeature.LAND)
# Add coastlines to plot
ax.coastlines()

# Plot each school with a color corresponding to the success of the prediction
for index, row in knn_df.iterrows():
    # add a label to the plot for the correctly predicted schools
    ax.scatter(row["longitude"], row["latitude"], transform=ccrs.Geodetic(), s=POINT_SIZE, color="green" if row["correct"] else "red", linewidth=0.5, edgecolor="black", zorder=2)

# Add text to the plot
for index, school in knn_df.iterrows():
    ax.text(school["longitude"], school["latitude"], analysis.conferences[school["conference"]], transform=ccrs.Geodetic(), horizontalalignment="left", verticalalignment="bottom")

# Add a legend to the plot
ax.legend(handles=[plt.Line2D([0], [0], color="green", lw=4), plt.Line2D([0], [0], color="red", lw=4)], labels=["Correct", "Incorrect"])

# Add a title to the plot
ax.set_title("2024 FBS Conference Members - K-Nearest Neighbors Predictions")

plt.show()

from sklearn.cluster import KMeans

k_means_df = analysis.school_locations.copy()
X = k_means_df.drop(columns=["school", "conference"])

allowed_cluster_size = range(8, 19)
min_cluster_size, max_cluster_size = 0, 0
# print(f"Attempting to create clusters with sizes in [{allowed_cluster_size[0]}, {allowed_cluster_size[-1]}]")
while min_cluster_size not in allowed_cluster_size and max_cluster_size not in allowed_cluster_size:
    # Create an instance of the KMeans class. Iterate up 1000 times to find the best clusters
    k_means = KMeans(n_clusters=analysis.num_conferences, n_init='auto', max_iter=50)    # random_state=RANDOM_STATE
    # Fit the data to the model
    k_means.fit(X)
    # find the size of the smallest cluster
    conf_members = [len(k_means.labels_[k_means.labels_ == i]) for i in range(analysis.num_conferences)]
    min_cluster_size = min(conf_members)
    max_cluster_size = max(conf_members)
    # print(f"Min: {min_cluster_size}, Max: {max_cluster_size}")

# Get the cluster labels for each data point
labels = k_means.labels_

# Get the cluster centers
centers = k_means.cluster_centers_

# Add the cluster labels to the dataframe
k_means_df["predicted_cluster"] = labels

# Print information about the results
# print(f"Iterations executed: {k_means.n_iter_}")

# Create a figure with an axes object on which we will plot. Pass the projection to that axes.
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection=ccrs.LambertConformal(central_longitude=-100, central_latitude=45)))
ax: cartopy.mpl.geoaxes.GeoAxes
# Set the extent of the map to the contiguous United States
ax.set_extent(USA_BOUNDS)
# Add state boundaries to plot
ax.add_feature(cfeature.STATES)
# Add country borders to plot
ax.add_feature(cfeature.BORDERS)
# Add ocean to plot
ax.add_feature(cfeature.OCEAN)
# Add lakes to plot
ax.add_feature(cfeature.LAKES)
# Add land to plot
ax.add_feature(cfeature.LAND)
# Add coastlines to plot
ax.coastlines()

# Add all the schools to the plot with a different color for each conference
for cluster_label in range(analysis.num_conferences):
    cluster_data = k_means_df[k_means_df["predicted_cluster"] == cluster_label]
    ax.scatter(cluster_data["longitude"], cluster_data["latitude"], transform=ccrs.Geodetic(), s=20, label=f"Cluster {cluster_label}", color=conference_colors[cluster_label], linewidth=0.5, edgecolor="black", zorder=2)

# add circles around the clusters
for cluster_label in range(analysis.num_conferences):
    cluster_data = k_means_df[k_means_df["predicted_cluster"] == cluster_label]
    centroid = centers[cluster_label][::1] # longitude, latitude
    distances = [np.linalg.norm(np.array([centroid[0], centroid[1]], dtype=np.float64) - np.array([row[3], row[4]], dtype=np.float64)) for row in cluster_data.itertuples()]
    radius = max(distances)
    # print(f"Cluster {cluster_label} centroid: {cluster_center[1]}, {cluster_center[0]}, radius: {radius}")
    ax.add_patch(plt.Circle((centroid[1], centroid[0]), radius, edgecolor=conference_colors[cluster_label], fill=False, linewidth=1.5, transform=ccrs.Geodetic(), zorder=1.5))

# Add a title to the plot
ax.set_title("2024 FBS Conference Suggestions - K-Means Clusters")
ax.legend(title="Conference", bbox_to_anchor=(1.05, 1), loc="upper left", handles=legend_elements)
plt.show()

Set Up¶

Imports, Global Variables, and Constants¶

Load Season Data¶

2024 Season Adjustments¶

Data Visualization¶

Mean and Median Distances within Conferences¶

Distance Calculations¶

Plot Mean vs. Median Distances in Conferences¶

Closest and Farthest School Pairs¶

Closest Pairs of Schools in Each Conference¶

Farthest Pair of Schools in Each Conference¶

K-Nearest Neighbors Model¶

Actual vs. Predicted Clusters¶

K-Means Clustering¶

Visualize the K-Means Clusters¶

Conclusions¶