"""
Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
https://github.com/hugogobato/Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
论文地址：https://www.jstor.org/stable/10.1086/209650
"""

import warnings

import numpy as np
import pandas as pd


def rv_from_stock_zh_a_hist_min_em(
    symbol="000001",
    start_date="2021-10-20 09:30:00",
    end_date="2024-11-01 15:00:00",
    period="1",
    adjust="hfq",
) -> pd.DataFrame:
    """
    从东方财富网获取股票的分钟级历史行情数据,并进行数据清洗和格式化为计算 yz 已实现波动率所需的数据格式
    https://quote.eastmoney.com/concept/sh603777.html?from=classic
    :param symbol: 股票代码,如"000001"
    :type symbol: str
    :param start_date: 开始日期时间,格式"YYYY-MM-DD HH:MM:SS"
    :type start_date: str
    :param end_date: 结束日期时间,格式"YYYY-MM-DD HH:MM:SS"
    :type end_date: str
    :param period: 时间周期,可选{'1','5','15','30','60'}分钟
    :type period: str
    :param adjust: 复权方式,可选{'','qfq'(前复权),'hfq'(后复权)}
    :type adjust: str
    :return: 整理后的分钟行情数据,包含Date(索引),Open,High,Low,Close列
    :rtype: pandas.DataFrame
    """
    from akshare.stock_feature.stock_hist_em import stock_zh_a_hist_min_em

    temp_df = stock_zh_a_hist_min_em(
        symbol=symbol,
        start_date=start_date,
        end_date=end_date,
        period=period,
        adjust=adjust,
    )
    temp_df.rename(
        columns={
            "时间": "Date",
            "开盘": "Open",
            "最高": "High",
            "最低": "Low",
            "收盘": "Close",
        },
        inplace=True,
    )
    temp_df = temp_df[temp_df["Open"] != 0]
    temp_df["Date"] = pd.to_datetime(temp_df["Date"])
    temp_df.set_index(keys="Date", inplace=True)
    return temp_df


def rv_from_futures_zh_minute_sina(
    symbol: str = "IF2008", period: str = "5"
) -> pd.DataFrame:
    """
    从新浪财经获取期货的分钟级历史行情数据,并进行数据清洗和格式化
    https://vip.stock.finance.sina.com.cn/quotes_service/view/qihuohangqing.html#titlePos_3
    :param symbol: 期货合约代码,如"IF2008"代表沪深300期货2020年8月合约
    :type symbol: str
    :param period: 时间周期,可选{'1','5','15','30','60'}分钟
    :type period: str
    :return: 整理后的分钟行情数据,包含Date(索引),Open,High,Low,Close列
    :rtype: pandas.DataFrame
    """
    from akshare.futures.futures_zh_sina import futures_zh_minute_sina

    temp_df = futures_zh_minute_sina(symbol=symbol, period=period)
    temp_df.rename(
        columns={
            "datetime": "Date",
            "open": "Open",
            "high": "High",
            "low": "Low",
            "close": "Close",
        },
        inplace=True,
    )
    temp_df["Date"] = pd.to_datetime(temp_df["Date"])
    temp_df.set_index(keys="Date", inplace=True)
    return temp_df


def volatility_yz_rv(data: pd.DataFrame) -> pd.DataFrame:
    (
        """
    波动率-已实现波动率-Yang-Zhang 已实现波动率(Yang-Zhang Realized Volatility)
    https://github.com/hugogobato/Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
    论文地址：https://www.jstor.org/stable/10.1086/209650
    基于以下公式计算:
    RV^2 = Vo + k*Vc + (1-k)*Vrs
    其中:
    - Vo: 隔夜波动率, Vo = 1/(n-1)*sum(Oi-Obar)^2
        Oi为标准化开盘价, Obar为标准化开盘价均值
    - Vc: 收盘波动率, Vc = 1/(n-1)*sum(ci-Cbar)^2
        ci为标准化收盘价, Cbar为标准化收盘价均值
    - k: 权重系数, k = 0.34/(1.34+(n+1)/(n-1))
        n为样本数量
    - Vrs: Rogers-Satchell波动率代理, Vrs = ui(ui-ci)+di(di-ci)
        ui = ln(Hi/Oi), ci = ln(Ci/Oi), di = ln(Li/Oi), oi = ln(Oi/Ci-1)
        Hi/Li/Ci/Oi分别为最高价/最低价/收盘价/开盘价

    :param data: 包含 OHLC(开高低收) 价格的 pandas.DataFrame
    :type data: pandas.DataFrame
    :return: 包含 Yang-Zhang 实现波动率的 pandas.DataFrame
    :rtype: pandas.DataFrame

    要求输入数据包含以下列:
    - Open: 开盘价
    - High: 最高价
    - Low: 最低价
    - Close: 收盘价
    # yang_zhang_rv formula is give as:
    # RV^2 = Vo + k*Vc + (1-k)*Vrs
    # where Vo = 1/(n-1)*sum(Oi-Obar)^2
    # with oi = normalized opening price at time t and Obar = mean of normalized opening prices
    # Vc = = 1/(n-1)*sum(ci-Cbar)^2
    # with ci = normalized close price at time t and Cbar = mean of normalized close prices
    # k = 0.34/(1.34+(n+1)/(n-1))
    # with n = total number of days or time periods considered
    # Vrs (Rogers & Satchell RV proxy) = ui(ui-ci)+di(di-ci)
    # with ui = ln(Hi/Oi), ci = ln(Ci/Oi), di=(Li/Oi), oi = ln(Oi/Ci-1)
    # where Hi = high price at time t and Li = low price at time t
    """
        ""
    )
    warnings.filterwarnings("ignore")

    data["ui"] = np.log(np.divide(data["High"][1:], data["Open"][1:]))
    data["ci"] = np.log(np.divide(data["Close"][1:], data["Open"][1:]))
    data["di"] = np.log(np.divide(data["Low"][1:], data["Open"][1:]))
    data["oi"] = np.log(np.divide(data["Open"][1:], data["Close"][: len(data) - 1]))
    data = data[1:]
    data["RS"] = data["ui"] * (data["ui"] - data["ci"]) + data["di"] * (
        data["di"] - data["ci"]
    )
    rs_var = data["RS"].groupby(pd.Grouper(freq="1D")).mean().dropna()
    vc_and_vo = data[["oi", "ci"]].groupby(pd.Grouper(freq="1D")).var().dropna()
    n = int(len(data) / len(rs_var))
    k = 0.34 / (1.34 + (n + 1) / (n - 1))
    yang_zhang_rv = np.sqrt((1 - k) * rs_var + vc_and_vo["oi"] + vc_and_vo["ci"] * k)
    yang_zhang_rv_df = pd.DataFrame(yang_zhang_rv)
    yang_zhang_rv_df.rename(columns={0: "yz_rv"}, inplace=True)
    yang_zhang_rv_df.reset_index(inplace=True)
    yang_zhang_rv_df.columns = ["date", "rv"]
    yang_zhang_rv_df["date"] = pd.to_datetime(
        yang_zhang_rv_df["date"], errors="coerce"
    ).dt.date
    return yang_zhang_rv_df


if __name__ == "__main__":
    futures_df = rv_from_futures_zh_minute_sina(symbol="IF2008", period="1")
    volatility_yz_rv_df = volatility_yz_rv(data=futures_df)
    print(volatility_yz_rv_df)

    stock_df = rv_from_stock_zh_a_hist_min_em(
        symbol="000001",
        start_date="2021-10-20 09:30:00",
        end_date="2024-11-01 15:00:00",
        period="5",
        adjust="",
    )
    volatility_yz_rv_df = volatility_yz_rv(data=stock_df)
    print(volatility_yz_rv_df)
