diff --git a/hong_kong_market.py b/hong_kong_market.py new file mode 100644 index 0000000..af93ec6 --- /dev/null +++ b/hong_kong_market.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +香港市场财报数据获取脚本 V2 +""" + +import akshare as ak +import pandas as pd +import numpy as np +from datetime import datetime +import warnings +import time +from tqdm import tqdm +import os + +warnings.filterwarnings("ignore") + +# 导入工具函数 +from src.utils import * + + +def get_hk_stock_list(): + """获取香港股票列表""" + try: + # 获取港股现货行情 + print("正在获取香港股票列表...") + hk_spot = ak.stock_hk_spot_em() + + # 筛选主板股票(排除创业板等) + # 港股代码规则:主板股票代码一般是5位数,00001-09999 + hk_spot = hk_spot[hk_spot['代码'].str.len() == 5] + hk_spot = hk_spot[hk_spot['代码'].str.startswith('0')] + + # 过滤掉没有价格的股票 + hk_spot = hk_spot[hk_spot['最新价'] > 0] + hk_spot = hk_spot[~hk_spot['最新价'].isna()] + + print(f"获取到 {len(hk_spot)} 只有效香港股票") + return hk_spot + except Exception as e: + print(f"获取香港股票列表失败: {e}") + return pd.DataFrame() + + +def get_hk_financial_indicators(code): + """获取港股财务指标数据""" + try: + # 获取财务指标 + financial_data = ak.stock_financial_hk_analysis_indicator_em( + symbol=code, + indicator="按报告期" + ) + return financial_data + except Exception as e: + return pd.DataFrame() + + +def safe_float(value, default=0): + """安全转换为浮点数""" + try: + if pd.isna(value) or value is None or value == '-': + return default + return float(value) + except: + return default + + +def calculate_growth_rate(current, previous): + """计算增长率""" + if previous == 0 or pd.isna(previous): + return 0 + return (current - previous) / abs(previous) + + +def process_hk_stock(code, hk_spot_info): + """处理单只港股数据""" + try: + # 获取实时行情信息 + stock_info = hk_spot_info[hk_spot_info['代码'] == code] + if stock_info.empty: + print(f"{code} 未找到实时信息") + return None + + # 基础信息 + name = stock_info['名称'].values[0] + current_price = stock_info['最新价'].values[0] + volume = stock_info['成交量'].values[0] if '成交量' in stock_info.columns else 0 + + # 跳过无效数据 + if pd.isna(current_price) or current_price == 0: + print(f"{code} 无有效价格") + return None + + # 获取财务指标数据 + financial_data = get_hk_financial_indicators(code) + + if financial_data.empty: + print(f"{code} 无财务数据") + return None + + # 获取最新一期数据 + latest_data = financial_data.iloc[0] + + # 构建基础结果 + result = { + "股票名称": name, + "代码": code, + "行业": "港股", # 暂时使用固定值 + "总市值": 0, # 需要计算 + "当前价格": current_price, + "总股本": 0, # 需要从其他数据源获取 + } + + # 从财务指标中提取数据 + # 每股收益 + eps = safe_float(latest_data.get('BASIC_EPS', 0)) + # 每股净资产 + bps = safe_float(latest_data.get('BPS', 0)) + # 每股经营现金流 + cfps = safe_float(latest_data.get('PER_NETCASH_OPERATE', 0)) + + # 计算市盈率、市净率等 + if eps > 0: + pe_ratio = current_price / eps + else: + pe_ratio = 0 + + if bps > 0: + pb_ratio = current_price / bps + else: + pb_ratio = 0 + + # 获取营业收入和净利润相关数据 + revenue = safe_float(latest_data.get('TOTAL_OPERATE_INCOME', 0)) + net_profit = safe_float(latest_data.get('NETPROFIT', 0)) + deducted_profit = safe_float(latest_data.get('DEDUCTNP', 0)) + + # 获取总资产和总负债 + total_assets = safe_float(latest_data.get('TOT_ASSETS', 0)) + total_liabilities = safe_float(latest_data.get('TOT_LIAB', 0)) + + # 填充财务数据 + result.update({ + "扣非净利润-TTM": to_yi_round2(deducted_profit) if deducted_profit else 0, + "扣非净利润同比-TTM": 0, # 需要历史数据计算 + "扣非利润年复合同比": 0, + "扣非净利润增速-TTM": 0, + "当季扣非净利润同比": 0, + "当季扣非净利润增速": 0, + "营业总收入-TTM": to_yi_round2(revenue) if revenue else 0, + "营业总收入同比-TTM": 0, + "营业总收入年复合同比": 0, + "营业总收入增速-TTM": 0, + "当季营业总收入同比": 0, + "当季营业总收入增速": 0, + "当季毛利率": 0, + "当季毛利率同比": 0, + "当季毛利率同比增速": 0, + "自由现金流": to_yi_round2(cfps * bps) if cfps and bps else 0, + "自由现金流复合增长率": 0, + "总负债": to_yi_round2(total_liabilities), + "总资产": to_yi_round2(total_assets), + "少数股东损益": 0, + "当季三费同比": 0, + "商誉": 0, + "商誉占比": 0, + "市净率": pb_ratio, + "市销率": current_price * volume / revenue if revenue > 0 else 0, + "市盈率-TTM": pe_ratio, + "市盈率-TTM(财报)": pe_ratio, + "ROE": safe_float(latest_data.get('AVG_ROE', 0)), + "ROE_qualified-TTM": safe_float(latest_data.get('DILUTED_ROE', 0)), + "每股分红": 0, + "股息率": 0, + "分红率": 0, + "每股收益": eps, + "每股收益-占比": eps / current_price if current_price > 0 else 0, + "每股FCF": cfps, + "每股FCF-占比": cfps / current_price if current_price > 0 else 0, + "每股资产": bps, + "每股资产-占比": bps / current_price if current_price > 0 else 0, + "每股负债": bps * (total_liabilities / total_assets) if total_assets > 0 else 0, + "每股负债-占比": (bps * (total_liabilities / total_assets)) / current_price if total_assets > 0 and current_price > 0 else 0, + }) + + # 如果有多期数据,计算同比增长 + if len(financial_data) > 4: + try: + # 获取去年同期数据 + last_year_data = financial_data.iloc[4] + + # 计算扣非净利润同比 + last_deducted_profit = safe_float(last_year_data.get('DEDUCTNP', 0)) + if last_deducted_profit != 0: + result["扣非净利润同比-TTM"] = calculate_growth_rate(deducted_profit, last_deducted_profit) + result["扣非净利润增速-TTM"] = result["扣非净利润同比-TTM"] + + # 计算营业收入同比 + last_revenue = safe_float(last_year_data.get('TOTAL_OPERATE_INCOME', 0)) + if last_revenue != 0: + result["营业总收入同比-TTM"] = calculate_growth_rate(revenue, last_revenue) + result["营业总收入增速-TTM"] = result["营业总收入同比-TTM"] + except: + pass + + return result + + except Exception as e: + print(f"处理 {code} 时出错: {e}") + return None + + +def main(): + """主函数""" + try: + # 创建输出目录 + os.makedirs("./daily_report", exist_ok=True) + + # 获取港股列表 + hk_stocks = get_hk_stock_list() + if hk_stocks.empty: + print("无法获取港股列表") + return + + # 限制处理数量(测试时可以减少) + # hk_stocks = hk_stocks.head(50) # 测试时只处理前50只 + + results = [] + failed_count = 0 + + # 处理每只股票 + for idx, row in tqdm(hk_stocks.iterrows(), total=len(hk_stocks), desc="处理港股数据"): + code = row['代码'] + result = process_hk_stock(code, hk_stocks) + + if result: + results.append(result) + else: + failed_count += 1 + + # 每处理10只股票暂停一下,避免请求过快 + if (idx + 1) % 10 == 0: + time.sleep(1) + + # 保存结果 + if results: + df_results = pd.DataFrame(results) + + # 确保所有必需的列都存在 + required_columns = [ + "股票名称", "代码", "行业", "总市值", "当前价格", "总股本", + "扣非净利润-TTM", "扣非净利润同比-TTM", "扣非利润年复合同比", + "扣非净利润增速-TTM", "当季扣非净利润同比", "当季扣非净利润增速", + "营业总收入-TTM", "营业总收入同比-TTM", "营业总收入年复合同比", + "营业总收入增速-TTM", "当季营业总收入同比", "当季营业总收入增速", + "当季毛利率", "当季毛利率同比", "当季毛利率同比增速", + "自由现金流", "自由现金流复合增长率", "总负债", "总资产", + "少数股东损益", "当季三费同比", "商誉", "商誉占比", + "市净率", "市销率", "市盈率-TTM", "市盈率-TTM(财报)", + "ROE", "ROE_qualified-TTM", "每股分红", "股息率", "分红率", + "每股收益", "每股收益-占比", "每股FCF", "每股FCF-占比", + "每股资产", "每股资产-占比", "每股负债", "每股负债-占比" + ] + + # 确保所有列都存在 + for col in required_columns: + if col not in df_results.columns: + df_results[col] = 0 + + # 按照要求的列顺序排列 + df_results = df_results[required_columns] + + # 格式化百分比列 + percentage_columns = [ + "扣非净利润同比-TTM", "扣非利润年复合同比", "扣非净利润增速-TTM", + "当季扣非净利润同比", "当季扣非净利润增速", "营业总收入同比-TTM", + "营业总收入年复合同比", "营业总收入增速-TTM", "当季营业总收入同比", + "当季营业总收入增速", "当季毛利率", "当季毛利率同比", + "当季毛利率同比增速", "自由现金流复合增长率", "当季三费同比", + "商誉占比", "股息率", "分红率", "每股收益-占比", "每股FCF-占比", + "每股资产-占比", "每股负债-占比" + ] + + for col in percentage_columns: + df_results[col] = df_results[col].apply(lambda x: f"{x*100:.2f}%" if pd.notna(x) else "0.00%") + + # 保存文件 + save_path = f"./daily_report/hk_stock_data_{current_date}.csv" + df_results.to_csv(save_path, index=False, encoding='utf-8-sig') + + print(f"\n处理完成!") + print(f"成功处理: {len(results)} 只股票") + print(f"失败处理: {failed_count} 只股票") + print(f"数据已保存到: {save_path}") + + else: + print("没有成功处理任何股票数据") + + except Exception as e: + print(f"程序执行出错: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() \ No newline at end of file