Administrator
2025-05-27 e6cbb6c96b6576da5eaf089cfa3dd6098cc6baf9
data_parser/transaction_big_order_parser.py
@@ -194,9 +194,60 @@
    ))
def extract_big_order_of_code(dir_path, code):
def extract_big_order_of_code(dir_path, code=None):
    """
    提取代码的大单
    @param dir_path: 数据目录
    @param code: 为空表示导出全部
    @return:
    """
    def first_last(group):
        """
            获取第一条数据与最后一条
            @param group:
            @return:
            """
        return pd.Series({
            'SecurityID': group['SecurityID'].iloc[0],
            'BuyNo': group['BuyNo'].iloc[0],
            'TotalVolume': group['TotalVolume'].sum(),
            'TotalAmount': group['TotalAmount'].sum(),
            'EndTime': group['EndTime'].iloc[-1],
            'EndPrice': group['EndPrice'].iloc[-1],
            'StartTime': group['StartTime'].iloc[0],
            'StartPrice': group['StartPrice'].iloc[0]
        })
    combined_path = os.path.join(dir_path, 'combined.csv')
    if not os.path.exists(combined_path):
        print("拼接数据不存在")
        return
    df = pd.read_csv(combined_path)
    df_copy = df.copy()
    if code:
        df_copy = df_copy[df_copy["SecurityID"] == int(code)]
    if df_copy.empty:
        print("目标代码对应成交数据为空")
        return
    df_copy["SecurityID"] = df_copy["SecurityID"].apply(BigOrderDealParser.code_format)
    # 按SecurityID和BuyNo分组
    grouped = df_copy.groupby(['SecurityID', 'BuyNo'])
    grouped_result = grouped.apply(first_last)
    grouped_result = grouped_result[grouped_result["TotalAmount"] > 500000]
    # print(grouped_result)
    # 遍历内容
    if code:
        grouped_result.to_csv(os.path.join(dir_path, f"big_buy_{code}.csv"), index=False)
    else:
        grouped_result.to_csv(os.path.join(dir_path, f"big_buy.csv"), index=False)
    print("保存成功")
def extract_big_order_codes(dir_path):
    """
    导出大单代码
    @param dir_path: 数据目录
    @param code:
    @return:
    """
@@ -208,12 +259,6 @@
            @return:
            """
        return pd.Series({
            'TotalAmount': group['TotalAmount'].sum(),
            'TotalVolume': group['TotalVolume'].sum(),
            'StartTime': group['StartTime'].iloc[0],
            'StartPrice': group['StartPrice'].iloc[0],
            'EndTime': group['EndTime'].iloc[-1],
            'EndPrice': group['EndPrice'].iloc[-1]
        })
    combined_path = os.path.join(dir_path, 'combined.csv')
@@ -222,21 +267,18 @@
        return
    df = pd.read_csv(combined_path)
    df_copy = df.copy()
    df_copy = df_copy[df_copy["SecurityID"] == int(code)]
    if df_copy.empty:
        print("目标代码对应成交数据为空")
        return
    df_copy["SecurityID"] = df_copy["SecurityID"].apply(BigOrderDealParser.code_format)
    # 按SecurityID和BuyNo分组
    grouped = df_copy.groupby(['SecurityID', 'BuyNo'])
    grouped_result = grouped.apply(first_last)
    grouped_result = grouped_result[grouped_result["TotalAmount"] > 500000]
    grouped_result.to_csv(os.path.join(dir_path, f"{code}.csv"))
    print("保存成功")
    grouped = df_copy.groupby(['SecurityID'])
    return set(grouped.groups.keys())
if __name__ == "__main__":
    # pre_process_transactions("E:/测试数据/Transaction_Test.csv")
    # pre_process_ngtsticks("E:/测试数据/NGTSTick_Test.csv")
    # concat_pre_transactions("E:/测试数据/Transaction_Test")
    extract_big_order_of_code("E:/测试数据/Transaction_Test", "000017")
    # extract_big_order_codes("E:/测试数据/Transaction_Test")
    extract_big_order_of_code("E:/测试数据/Transaction_Test")