3  A deeper dive — looking at the Marathon

Let’s play around with the data a little bit. We will load the data and look at the top 10 marathon performances of all times.

Code
import polars as pl
import altair as alt
from camminapy.plot import altair_theme


altair_theme()
df = (
    pl.read_csv("../data/data.csv", separator=";")
    .filter(pl.col("Event") == "Marathon")
    .groupby("Sex")
    .head(10)
    .with_columns(Hours=pl.col("Mark [meters or seconds]") / 3600)
)

chart = (
    alt.Chart(
        df.with_columns(
            pl.col("Competitor").str.split(" ").list.last().alias("Last Name")
        )
    )
    .mark_line(point=True)
    .encode(
        x="Rank:N",
        y=alt.Y("Hours:Q").scale(zero=False),
        color=alt.Color("Sex:N")
        .scale(domain=["female", "male"], range=["purple", "orange"])
        .title(None)
        .legend(None),
        tooltip=[f"{c}:N" for c in df.columns],
    )
    .properties(width=550, height=300)
    # .interactive()
)
alt.layer(
    chart, chart.mark_text(dy=-10, dx=10, angle=320).encode(text="Last Name:N")
).facet(row="Sex:N").resolve_scale(y="independent")

We can also look at how world records have progressed over time.

Code
wr = (
    pl.read_csv("../data/data.csv", separator=";", try_parse_dates=True)
    .sort("Sex", "Event", "Date")
    .with_columns((pl.col("Mark [meters or seconds]") / 3600).alias("Hours"))
    .with_columns(
        (pl.col("Hours") == pl.col("Hours").cummin())
        .over("Sex", "Event")
        .alias("Performance has been a WR")
    )
    .filter(pl.col("Performance has been a WR"))
    .filter(pl.col("Event") == "Marathon")
    .with_columns(pl.col("Competitor").str.split(" ").list.last().alias("Last Name"))
    .sort("Sex", "Hours")
)


chart = (
    alt.Chart(wr.to_pandas())
    .mark_line(point=True, interpolate="step-after")
    .encode(
        x="Date:T",
        y=alt.Y("Hours:Q").scale(zero=False),
        color=alt.Color("Sex:N")
        .scale(domain=["female", "male"], range=["purple", "orange"])
        .title(None)
        .legend(None),
        tooltip=[f"{c}:N" for c in df.columns],
    )
    .properties(width=550, height=300)
    # .interactive()
)
alt.layer(
    chart, chart.mark_text(dy=-10, dx=10, angle=320).encode(text="Last Name:N")
).facet(row="Sex:N").resolve_scale(y="independent")