Matplotlib / Seaborn Notebook

Matplotlib is Python's base plotting library. Seaborn is built on top of it and handles statistical charts with less code. In real work, use Seaborn for most plots and drop into Matplotlib when you need fine-grained control.

All examples use the same parking dataset from Pandas Notebook: parking_df, payment_df, and station_df, plus aggregated results like monthly and station_summary that you would get from those workflows. For picking the right chart for your data, see Visualization Selection Guide.

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
import pandas as pd
import numpy as np

Setup & Style

# apply a clean theme once at the top of your notebook
sns.set_theme(style='whitegrid', palette='muted')

# default figure size (width x height in inches)
plt.rcParams['figure.figsize'] = (10, 5)

style='whitegrid' adds horizontal grid lines without too much visual noise. Set the figure size once so you don't have to repeat it on every chart.

Line Chart — Trend Over Time

# monthly revenue trend
monthly = (
    parking_df
    .groupby(parking_df['entry_time'].dt.to_period('M'))['amount']
    .sum()
    .reset_index()
)
monthly.columns = ['month', 'revenue']
monthly['month_str'] = monthly['month'].astype(str)   # Period → string for plotting

Basic Line

fig, ax = plt.subplots()
ax.plot(monthly['month_str'], monthly['revenue'], marker='o')
ax.set_title('Monthly Revenue')
ax.set_xlabel('Month')
ax.set_ylabel('Revenue (NTD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

With Rolling Average Overlay

monthly['rolling_3m'] = monthly['revenue'].rolling(3, center=True).mean()

fig, ax = plt.subplots()
ax.plot(monthly['month_str'], monthly['revenue'],    marker='o', label='Monthly')
ax.plot(monthly['month_str'], monthly['rolling_3m'], linestyle='--', label='3M Avg')
ax.set_title('Monthly Revenue with 3-Month Rolling Average')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

center=True on the rolling mean centers the window, so the smoothed line does not lag behind the real values.

Bar Chart — Category Comparison

Vertical Bar (Seaborn)

station_rev = (
    parking_df.groupby('station_code')['amount']
    .sum()
    .sort_values(ascending=False)
    .reset_index()
)

fig, ax = plt.subplots()
sns.barplot(data=station_rev, x='station_code', y='amount', ax=ax)
ax.set_title('Total Revenue by Station')
ax.set_xlabel('Station')
ax.set_ylabel('Revenue (NTD)')
plt.tight_layout()
plt.show()

Horizontal Bar (better when category labels are long)

fig, ax = plt.subplots()
sns.barplot(data=station_rev, y='station_code', x='amount', orient='h', ax=ax)
ax.set_title('Total Revenue by Station')
plt.tight_layout()
plt.show()

Stacked Bar — Composition by Group

pivot = parking_df.pivot_table(
    values='amount', index='station_code',
    columns='payment_method', aggfunc='sum', fill_value=0
)
pivot.plot(kind='bar', stacked=True, figsize=(10, 5))
plt.title('Revenue by Station and Payment Method')
plt.ylabel('Revenue (NTD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Use .plot() on a DataFrame directly for stacked charts. Seaborn does not have a stacked bar.

Histogram — One Variable's Distribution

fig, ax = plt.subplots()
sns.histplot(parking_df['amount'], bins=30, kde=True, ax=ax)
ax.set_title('Distribution of Parking Amounts')
ax.set_xlabel('Amount (NTD)')
plt.tight_layout()
plt.show()

kde=True adds a smooth density curve on top. It is helpful for seeing whether the distribution is skewed.

Compare Two Groups

fig, ax = plt.subplots()
sns.histplot(
    data=parking_df, x='amount',
    hue='parking_type', bins=30, kde=True, ax=ax
)
ax.set_title('Amount Distribution by Parking Type')
plt.tight_layout()
plt.show()

hue splits the chart into overlapping histograms — good for 2–3 groups. More than that becomes hard to read.

Box Plot — Distribution by Group

fig, ax = plt.subplots()
sns.boxplot(data=parking_df, x='parking_type', y='amount', ax=ax)
ax.set_title('Amount Distribution by Parking Type')
plt.tight_layout()
plt.show()

Box = IQR (25th–75th percentile), line = median, whiskers = 1.5× IQR, dots = outliers.

With Data Points (Strip)

fig, ax = plt.subplots()
sns.boxplot(data=parking_df, x='parking_type', y='amount', ax=ax)
sns.stripplot(data=parking_df, x='parking_type', y='amount',
              color='black', alpha=0.3, size=2, ax=ax)
plt.tight_layout()
plt.show()

Add stripplot on top of boxplot to show the points along with the summary. Helpful when n is small.

Scatter Plot — Link Between Two Numeric Variables

fig, ax = plt.subplots()
sns.scatterplot(
    data=parking_df, x='duration_mins', y='amount',
    hue='parking_type', alpha=0.5, ax=ax
)
ax.set_title('Duration vs Amount')
plt.tight_layout()
plt.show()

With Regression Line

fig, ax = plt.subplots()
sns.regplot(data=parking_df, x='duration_mins', y='amount',
            scatter_kws={'alpha': 0.3}, ax=ax)
ax.set_title('Duration vs Amount (with trend line)')
plt.tight_layout()
plt.show()

regplot fits a straight line through the points and shows a confidence band. Use it when you want to see the direction of the link at a glance.

Heatmap — Correlation Matrix or Pivot Table

Correlation Matrix

corr = parking_df.select_dtypes(include='number').corr()

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm',
            vmin=-1, vmax=1, ax=ax)
ax.set_title('Correlation Matrix')
plt.tight_layout()
plt.show()

annot=True prints the value in each cell. fmt='.2f' shows 2 decimal places. vmin/vmax lock the color scale, so -1 is always blue and 1 is always red.

Pivot Heatmap (Revenue by Station × Month)

pivot = parking_df.pivot_table(
    values='amount', index='station_code',
    columns=parking_df['entry_time'].dt.month, aggfunc='sum'
)

fig, ax = plt.subplots(figsize=(12, 5))
sns.heatmap(pivot, annot=True, fmt='.0f', cmap='YlOrRd', ax=ax)
ax.set_title('Revenue by Station × Month')
ax.set_xlabel('Month')
plt.tight_layout()
plt.show()

Subplots — Several Charts in One Figure

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# left: revenue by station
sns.barplot(data=station_rev, x='station_code', y='amount', ax=axes[0])
axes[0].set_title('Revenue by Station')

# right: amount distribution
sns.histplot(parking_df['amount'], bins=30, kde=True, ax=axes[1])
axes[1].set_title('Amount Distribution')

plt.suptitle('Parking System Overview', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

Always pass ax=axes[i] to Seaborn functions when you use subplots. If you don't, they create a new figure on their own.

2×2 Grid

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# top-left
sns.barplot(data=station_rev, x='station_code', y='amount', ax=axes[0, 0])
axes[0, 0].set_title('Revenue by Station')

# top-right
sns.histplot(parking_df['amount'], bins=30, kde=True, ax=axes[0, 1])
axes[0, 1].set_title('Amount Distribution')

# bottom-left
sns.boxplot(data=parking_df, x='parking_type', y='amount', ax=axes[1, 0])
axes[1, 0].set_title('Amount by Type')

# bottom-right
sns.scatterplot(data=parking_df, x='duration_mins', y='amount',
                alpha=0.3, ax=axes[1, 1])
axes[1, 1].set_title('Duration vs Amount')

plt.tight_layout()
plt.show()

Reach subplots with axes[row, col] for 2D grids, and axes[i] for 1D rows.

Styling & Formatting

Axis Labels & Title

ax.set_title('Chart Title', fontsize=14)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.tick_params(axis='x', rotation=45)

Format Y-Axis as Currency

ax.yaxis.set_major_formatter(mticker.FuncFormatter(
    lambda x, _: f'${x:,.0f}'
))

Format Y-Axis as Percentage

ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1, decimals=0))

Add Value Labels on a Bar Chart

for container in ax.containers:
    ax.bar_label(container, fmt='%.0f', padding=3)

Annotate a Specific Point

ax.annotate(
    'Peak',
    xy=(peak_x, peak_y),
    xytext=(peak_x, peak_y + 5000),
    arrowprops=dict(arrowstyle='->', color='red'),
    color='red'
)

Common Color Palettes

Palette Use case
'muted' Default category palette, low contrast
'Set2' Category, colorblind-friendly
'coolwarm' Diverging (positive / negative)
'YlOrRd' One direction (low → high intensity)
'Blues' One color, single direction
sns.set_palette('Set2')                          # apply globally
sns.barplot(..., palette='Set2')                 # apply to one chart

Save Figure

fig.savefig('output.png', dpi=150, bbox_inches='tight')

bbox_inches='tight' makes sure titles and labels are not cut off at the edges.

Common DA Workflows

1. Monthly Trend Report

monthly = (
    parking_df
    .groupby(parking_df['entry_time'].dt.to_period('M'))['amount']
    .sum()
    .reset_index()
)
monthly.columns = ['month', 'revenue']
monthly['month_str']  = monthly['month'].astype(str)
monthly['rolling_3m'] = monthly['revenue'].rolling(3, center=True).mean()
monthly['mom_pct']    = monthly['revenue'].pct_change() * 100

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# trend + rolling avg
axes[0].plot(monthly['month_str'], monthly['revenue'],    marker='o', label='Monthly')
axes[0].plot(monthly['month_str'], monthly['rolling_3m'], linestyle='--', label='3M Avg')
axes[0].set_title('Monthly Revenue')
axes[0].legend()
axes[0].tick_params(axis='x', rotation=45)

# MoM % change
colors = ['green' if v >= 0 else 'red' for v in monthly['mom_pct'].fillna(0)]
axes[1].bar(monthly['month_str'], monthly['mom_pct'].fillna(0), color=colors)
axes[1].axhline(0, color='black', linewidth=0.8)
axes[1].set_title('Month-over-Month Change (%)')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

2. Station Performance Dashboard

station_summary = (
    parking_df.groupby('station_code').agg(
        revenue = ('amount',     'sum'),
        visits  = ('parking_id', 'count'),
        avg_amt = ('amount',     'mean'),
    ).reset_index()
    .sort_values('revenue', ascending=False)
)

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

sns.barplot(data=station_summary, x='station_code', y='revenue', ax=axes[0])
axes[0].set_title('Revenue by Station')
for c in axes[0].containers:
    axes[0].bar_label(c, fmt='%.0f', padding=3, fontsize=8)

sns.barplot(data=station_summary, x='station_code', y='visits', ax=axes[1])
axes[1].set_title('Visits by Station')

sns.barplot(data=station_summary, x='station_code', y='avg_amt', ax=axes[2])
axes[2].set_title('Avg Amount by Station')

plt.suptitle('Station Performance Overview', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

3. EDA Distribution Overview

numeric_cols = parking_df.select_dtypes(include='number').columns.tolist()

n = len(numeric_cols)
fig, axes = plt.subplots(1, n, figsize=(6 * n, 4))

for i, col in enumerate(numeric_cols):
    sns.histplot(parking_df[col].dropna(), bins=30, kde=True, ax=axes[i])
    axes[i].set_title(f'Distribution: {col}')

plt.tight_layout()
plt.show()

One histogram per numeric column. Useful as a first-pass EDA (exploratory data analysis) step to spot skew and outliers before you build a model.

4. Correlation Heatmap + Scatter Matrix

# correlation heatmap
corr = parking_df.select_dtypes(include='number').corr()
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', vmin=-1, vmax=1, ax=ax)
ax.set_title('Correlation Matrix')
plt.tight_layout()
plt.show()

# scatter matrix for all numeric pairs
pd.plotting.scatter_matrix(
    parking_df.select_dtypes(include='number'),
    figsize=(10, 10), alpha=0.3, diagonal='kde'
)
plt.suptitle('Scatter Matrix', y=1.01)
plt.tight_layout()
plt.show()

Run these together at the start of any analysis. The correlation heatmap gives you a numeric overview, and the scatter matrix lets you check which links are real vs. driven by a few outliers.