# 台股選股因子實測(2018~) — 本文「五單因子 vs 0050」所有數字的可追溯來源
# 環境:finlab(conda)。對照基準 0050 一律用 etl:adj_close(含息,品質引擎鐵則)。
# 跑法:conda activate finlab && python factor-comparison.py
import warnings; warnings.filterwarnings("ignore")
import numpy as np, pandas as pd
from finlab import data
from finlab.backtest import sim

close = data.get('price:收盤價')
adj   = data.get('etl:adj_close')
pe    = data.get('price_earning_ratio:本益比')
pb    = data.get('price_earning_ratio:股價淨值比')
roe   = data.get('fundamental_features:ROE稅後').index_str_to_date().reindex(close.index, method='ffill')
rev   = data.get('monthly_revenue:去年同月增減(%)').reindex(close.index, method='ffill')
amount = (close * data.get('price:成交股數')).rolling(60).mean()
liquid = amount.rank(axis=1, pct=True) > 0.5   # 近 60 日成交額前 50%
START = '2018-01-01'

# 0050 含息 buy & hold(benchmark);同時算月 Sortino(與策略同口徑)
e = adj['0050'].dropna(); e = e[e.index >= START]
yrs = (e.index[-1] - e.index[0]).days / 365.25
cagr0050 = (e.iloc[-1] / e.iloc[0]) ** (1 / yrs) - 1
ret0050 = e.pct_change().dropna()
sharpe0050 = ret0050.mean() / ret0050.std() * (252 ** 0.5)
mdd0050 = ((e / e.cummax()) - 1).min()
rm0 = e.resample('M').last().pct_change().dropna()
sortino_m0050 = rm0.mean() / rm0[rm0 < 0].std() * (12 ** 0.5)

def topN(df, n=30, largest=True, lo=None, hi=None, liq=False):
    d = pd.DataFrame(df).astype(float)
    if lo is not None: d = d.where(d > lo)
    if hi is not None: d = d.where(d < hi)
    if liq: d = d.where(liquid)
    return d.rank(axis=1, ascending=not largest) <= n

def run(name, pos):
    s = sim(pos[pos.index >= START], resample='M', upload=False).get_stats()
    print(f"{name:10s} CAGR={s['cagr']:.4f} Sharpe={s.get('daily_sharpe'):.4f} MDD={s['max_drawdown']:.4f}")

print("== 單因子(各選 30 檔、月頻換股、2018~)==")
run('低本益比',  topN(pe, 30, False, 0, 100))
run('低股價淨值比', topN(pb, 30, False, 0, 10))
run('高ROE',    topN(roe, 30, True))
run('價格動能',  topN(close / close.shift(60) - 1, 30, True))
run('營收動能',  topN(rev, 30, True, 0))
# 低波動單因子:純取全市場最低波動會選到停滯的全額交割/低流動股(波動被低估),
# 故與複合策略同口徑加上流動性過濾(近 60 日成交額前 50%)。
run('低波動(過濾)', topN(-close.pct_change().rolling(60).std(), 30, True, liq=True))
print(f"\n0050(含息) CAGR={cagr0050:.4f} Sharpe={sharpe0050:.4f} "
      f"月Sortino={sortino_m0050:.4f} MDD={mdd0050:.4f}")
