# -*- coding: utf-8 -*-
"""EPS vs ROE 台股五分位選股回測。

對應文章:
  https://finlab.finance/blog/eps-vs-roe-stock-selection

執行方式(finlab 會自動引導登入):
  pip install finlab
  python strategy.py

回測設計:
  - 回測區間:2013-01-01 ~ 2026-06-09
  - 股票池:收盤價 > 10 元、20 日均量 > 100 張,且近四季因子有值
  - 因子 A:近四季每股盈餘合計(EPS TTM)
  - 因子 B:近四季 ROE 合計(ROE TTM)
  - 於財報公告截止日把池內股票按因子排序切五組,各組等權持有
  - 交易成本:finlab sim() 台股預設(手續費 0.1425%(未打折) + 證交稅 0.3%)
  - 基準:0050 還原價(etl:adj_close)買進持有,純指數算術、不含成本

輸出:
  - 終端機印出十個分位組合與 0050 的績效數字、分段年化、滾動三年報酬差
  - 在目前資料夾存出文章中的五張圖表(PNG):
      chart_quintile_cagr_bars.png   分位年化報酬長條圖
      chart_eps_quintile_curves.png  EPS 五分位淨值曲線
      chart_roe_quintile_curves.png  ROE 五分位淨值曲線
      chart_head_to_head.png         最高分位 vs 0050 淨值與回撤
      chart_rolling_3y_diff.png      滾動三年年化報酬差

投資警語:本程式僅供量化研究與教學用途,過去績效不代表未來表現,
不構成任何投資建議;實際交易前請自行評估風險、滑價與交易容量。
"""
import matplotlib

matplotlib.use("Agg")  # 不開視窗,直接把圖存成檔案

import matplotlib.pyplot as plt
import pandas as pd

from finlab import data
from finlab.backtest import sim

START = "2013-01-01"
END = "2026-06-09"


def cap(df):
    """把資料截到回測終點,確保結果可重現。"""
    return df[df.index <= END]


# ---------- 載入價量資料,建立股票池 ----------
close = cap(data.get("price:收盤價"))
volume = cap(data.get("price:成交股數"))

pool_daily = (close > 10) & (volume.rolling(20).mean() > 100_000)

# ---------- 因子:近四季合計,並對齊財報公告截止日 ----------
eps_quarterly = cap(data.get("financial_statement:每股盈餘"))
roe_quarterly = cap(data.get("fundamental_features:ROE稅後"))

# rolling(4).sum() = 近四季合計;index_str_to_date() 把「2024-Q3」這種
# 季別索引換成法定公告截止日,回測只用公告後才拿得到的數據,避免前視偏差
eps_ttm = cap(eps_quarterly.rolling(4).sum().index_str_to_date())
roe_ttm = cap(roe_quarterly.rolling(4).sum().index_str_to_date())


# 穩健性檢查(文章「穩健性檢查」一節):在股票池額外加上
#   equity = data.get("financial_statement:股東權益總額").index_str_to_date()
#   pool_quarterly = pool_quarterly & (equity.reindex(...) > 0)
# 排除股東權益為負的公司後重跑 ROE 五分位,五組結果與原版完全相同。


def quintile_positions(factor, pool_quarterly):
    """把池內股票按因子值排百分位,切成五組(Q1 最低、Q5 最高)。"""
    ranked = factor.where(pool_quarterly).rank(axis=1, pct=True)
    groups = {}
    for i in range(5):
        name = f"Q{i + 1}"
        groups[name] = (ranked > i / 5) & (ranked <= (i + 1) / 5)
    return groups


def clip_creturn(creturn):
    """sim() 的淨值曲線會延伸到執行當日;統計前先雙端截斷到回測起訖日。"""
    return creturn[(creturn.index >= START) & (creturn.index <= END)]


# ---------- 跑 EPS 與 ROE 各五組回測 ----------
curves = {}  # 各組淨值曲線,後面做分段、滾動分析與畫圖用
cagr_pct = {}  # 各組年化報酬(%),畫圖時放進標題與長條

for factor_name, factor in [("EPS", eps_ttm), ("ROE", roe_ttm)]:

    # 把日頻股票池對齊到財報公告截止日
    pool_quarterly = pool_daily.reindex(factor.index, method="ffill")
    groups = quintile_positions(factor, pool_quarterly)

    for group_name, position in groups.items():
        position = position[position.index >= START]
        report = sim(position, resample=None, upload=False)

        # 統計一律對截斷後的淨值曲線用純算術計算,口徑與 0050 基準相同
        curve = clip_creturn(report.creturn)
        daily_returns = curve.pct_change().dropna()
        total = curve.iloc[-1] / curve.iloc[0] - 1
        curve_years = (curve.index[-1] - curve.index[0]).days / 365.25
        group_cagr = ((1 + total) ** (1 / curve_years) - 1) * 100
        sharpe = daily_returns.mean() / daily_returns.std() * 252 ** 0.5
        max_drawdown = (curve / curve.cummax() - 1).min() * 100

        curves[f"{factor_name} {group_name}"] = curve
        cagr_pct[f"{factor_name} {group_name}"] = group_cagr
        print(
            f"{factor_name} {group_name}:"
            f" 年化 {group_cagr:.2f}%"
            f" 日夏普 {sharpe:.2f}"
            f" 最大回撤 {max_drawdown:.2f}%"
        )

# ---------- 0050 基準(還原價買進持有,純指數算術) ----------
adj_close = cap(data.get("etl:adj_close"))
benchmark = adj_close["0050"]
benchmark = benchmark[benchmark.index >= START].dropna()

total_return = benchmark.iloc[-1] / benchmark.iloc[0] - 1
years = (benchmark.index[-1] - benchmark.index[0]).days / 365.25
cagr = (1 + total_return) ** (1 / years) - 1
print(f"0050 含息:總報酬 {total_return * 100:.1f}% 年化 {cagr * 100:.2f}%")
curves["0050"] = benchmark / benchmark.iloc[0]
cagr_pct["0050"] = cagr * 100


# ---------- 分段年化報酬(穩健性檢查) ----------
def annualized_return(curve, start, end):
    """取一段期間的年化報酬(%)。"""
    segment = curve[(curve.index >= start) & (curve.index <= end)].dropna()
    seg_years = (segment.index[-1] - segment.index[0]).days / 365.25
    return (float(segment.iloc[-1] / segment.iloc[0]) ** (1 / seg_years) - 1) * 100


SEGMENTS = [
    ("2013-01-01", "2017-12-31"),
    ("2018-01-01", "2021-12-31"),
    ("2022-01-01", END),
]
print("\n分段年化報酬:")
for seg_start, seg_end in SEGMENTS:
    eps_ann = annualized_return(curves["EPS Q5"], seg_start, seg_end)
    roe_ann = annualized_return(curves["ROE Q5"], seg_start, seg_end)
    bench_ann = annualized_return(curves["0050"], seg_start, seg_end)
    print(
        f"{seg_start[:7]}~{seg_end[:7]}:"
        f" EPS Q5 {eps_ann:.2f}% / ROE Q5 {roe_ann:.2f}% / 0050 {bench_ann:.2f}%"
    )

# ---------- 滾動 3 年年化報酬差(ROE Q5 − EPS Q5) ----------
WINDOW = 756  # 約 3 年交易日

roe_curve = curves["ROE Q5"].dropna()
eps_curve = curves["EPS Q5"].reindex(roe_curve.index, method="ffill")

rolling_roe = (roe_curve / roe_curve.shift(WINDOW)) ** (252 / WINDOW) - 1
rolling_eps = (eps_curve / eps_curve.shift(WINDOW)) ** (252 / WINDOW) - 1
rolling_diff = ((rolling_roe - rolling_eps) * 100).dropna()

roe_win_pct = (rolling_diff > 0).mean() * 100

print("\n滾動 3 年年化報酬差(ROE Q5 − EPS Q5,百分點):")
print(f"  ROE 領先的窗口比例:{roe_win_pct:.1f}%")
print(f"  平均差:{rolling_diff.mean():.2f} / 最小:{rolling_diff.min():.2f} / 最大:{rolling_diff.max():.2f}")

# ---------- 圖表:重現文章中的五張圖 ----------
plt.rcParams["font.sans-serif"] = ["Heiti TC", "PingFang HK", "Arial Unicode MS"]
plt.rcParams["axes.unicode_minus"] = False

QUINTILES = ["Q1", "Q2", "Q3", "Q4", "Q5"]
QUINTILE_COLORS = ["#CBD5E1", "#94A3B8", "#64748B", "#F59E0B", "#DC2626"]
COLOR_EPS = "#EF4444"
COLOR_ROE = "#2563EB"
COLOR_BENCH = "#9CA3AF"


def style_axes(ax):
    """統一圖表外觀:淡格線、去掉上方與右方框線。"""
    ax.grid(True, alpha=0.22)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)


def save_figure(fig, filename):
    fig.tight_layout()
    fig.savefig(filename)
    plt.close(fig)
    print(f"已存圖:{filename}")


def plot_quintile_cagr_bars():
    """圖 1:EPS 與 ROE 各分位年化報酬並排長條圖,虛線為 0050 基準。"""
    fig, axes = plt.subplots(1, 2, figsize=(12, 6.75), dpi=100)
    x = range(5)
    tick_labels = ["Q1 最低", "Q2", "Q3", "Q4", "Q5 最高"]

    for ax, factor_name in zip(axes, ["EPS", "ROE"]):
        heights = [cagr_pct[f"{factor_name} {q}"] for q in QUINTILES]
        ax.bar(x, heights, color=QUINTILE_COLORS)
        ax.axhline(
            cagr_pct["0050"],
            color=COLOR_BENCH,
            linestyle="--",
            linewidth=1.5,
            label=f"0050 年化 {cagr_pct['0050']:.2f}%",
        )
        ax.set_title(f"{factor_name} 近四季合計五分位")
        ax.set_xticks(x)
        ax.set_xticklabels(tick_labels)
        ax.legend(loc="best")
        style_axes(ax)

    axes[0].set_ylabel("年化報酬 %")
    fig.suptitle(
        f"EPS vs ROE 五分位選股年化報酬(2013~2026,含交易成本):"
        f"ROE Q5 {cagr_pct['ROE Q5']:.2f}% vs EPS Q5 {cagr_pct['EPS Q5']:.2f}%",
        fontsize=14,
        fontweight="bold",
    )
    save_figure(fig, "chart_quintile_cagr_bars.png")


def plot_quintile_curves(factor_name, filename):
    """圖 2、圖 3:單一因子五分位的淨值曲線(對數座標),加 0050 對照。"""
    fig, ax = plt.subplots(figsize=(12, 6.75), dpi=100)

    for color, quintile in zip(QUINTILE_COLORS, QUINTILES):
        curve = curves[f"{factor_name} {quintile}"].dropna()
        ax.plot(curve.index, curve.values, label=f"{factor_name} {quintile}",
                color=color, linewidth=1.8)

    benchmark_curve = curves["0050"]
    ax.plot(benchmark_curve.index, benchmark_curve.values, label="0050 含息",
            color="#111827", linewidth=2.2, linestyle="--")

    ax.set_yscale("log")
    ax.set_title(
        f"{factor_name} 近四季合計五分位淨值曲線(2013~2026,對數座標)",
        fontsize=15,
        fontweight="bold",
    )
    ax.set_ylabel("淨值(起點=1,對數)")
    ax.legend(loc="upper left")
    style_axes(ax)
    save_figure(fig, filename)


def plot_head_to_head():
    """圖 4:ROE Q5、EPS Q5 與 0050 的淨值曲線(上)與回撤(下)。"""
    fig, (ax_curve, ax_drawdown) = plt.subplots(
        2, 1, figsize=(12, 6.75), dpi=100,
        sharex=True, gridspec_kw={"height_ratios": [2.2, 1]},
    )

    lines = [
        ("ROE Q5", "ROE Q5", COLOR_ROE, 2.4),
        ("EPS Q5", "EPS Q5", COLOR_EPS, 2.0),
        ("0050", "0050 含息", COLOR_BENCH, 2.0),
    ]
    for key, label, color, linewidth in lines:
        curve = curves[key].dropna()
        ax_curve.plot(curve.index, curve.values, label=label,
                      color=color, linewidth=linewidth)
        drawdown = curve / curve.cummax() - 1
        ax_drawdown.plot(drawdown.index, drawdown.values * 100,
                         color=color, linewidth=1.5)

    ax_curve.set_yscale("log")
    ax_curve.set_title(
        f"ROE 最高分位 vs EPS 最高分位 vs 0050(2013~2026):"
        f"年化 {cagr_pct['ROE Q5']:.2f}% / {cagr_pct['EPS Q5']:.2f}%"
        f" / {cagr_pct['0050']:.2f}%",
        fontsize=14,
        fontweight="bold",
    )
    ax_curve.set_ylabel("淨值(對數)")
    ax_curve.legend(loc="upper left")
    ax_drawdown.set_ylabel("回撤 %")
    style_axes(ax_curve)
    style_axes(ax_drawdown)
    save_figure(fig, "chart_head_to_head.png")


def plot_rolling_3y_diff():
    """圖 5:滾動 3 年年化報酬差,正值塗藍(ROE 領先)、負值塗紅。"""
    fig, ax = plt.subplots(figsize=(12, 6.75), dpi=100)
    ax.fill_between(rolling_diff.index, rolling_diff.values, 0,
                    where=rolling_diff.values >= 0, color=COLOR_ROE, alpha=0.3)
    ax.fill_between(rolling_diff.index, rolling_diff.values, 0,
                    where=rolling_diff.values < 0, color=COLOR_EPS, alpha=0.3)
    ax.plot(rolling_diff.index, rolling_diff.values, color="#111827", linewidth=1.6)
    ax.axhline(0, color="#6B7280", linewidth=1)
    ax.set_title(
        f"滾動 3 年年化報酬差:ROE Q5 − EPS Q5(正值 = ROE 領先,"
        f"{roe_win_pct:.1f}% 的時間為正)",
        fontsize=14,
        fontweight="bold",
    )
    ax.set_ylabel("年化報酬差(百分點)")
    style_axes(ax)
    save_figure(fig, "chart_rolling_3y_diff.png")


print()
plot_quintile_cagr_bars()
plot_quintile_curves("EPS", "chart_eps_quintile_curves.png")
plot_quintile_curves("ROE", "chart_roe_quintile_curves.png")
plot_head_to_head()
plot_rolling_3y_diff()
