Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
306 changes: 306 additions & 0 deletions hong_kong_market.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
香港市场财报数据获取脚本 V2
"""

import akshare as ak
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
import time
from tqdm import tqdm
import os

warnings.filterwarnings("ignore")

# 导入工具函数
from src.utils import *


def get_hk_stock_list():
"""获取香港股票列表"""
try:
# 获取港股现货行情
print("正在获取香港股票列表...")
hk_spot = ak.stock_hk_spot_em()

# 筛选主板股票(排除创业板等)
# 港股代码规则:主板股票代码一般是5位数,00001-09999
hk_spot = hk_spot[hk_spot['代码'].str.len() == 5]
hk_spot = hk_spot[hk_spot['代码'].str.startswith('0')]

# 过滤掉没有价格的股票
hk_spot = hk_spot[hk_spot['最新价'] > 0]
hk_spot = hk_spot[~hk_spot['最新价'].isna()]

print(f"获取到 {len(hk_spot)} 只有效香港股票")
return hk_spot
except Exception as e:
print(f"获取香港股票列表失败: {e}")
return pd.DataFrame()


def get_hk_financial_indicators(code):
"""获取港股财务指标数据"""
try:
# 获取财务指标
financial_data = ak.stock_financial_hk_analysis_indicator_em(
symbol=code,
indicator="按报告期"
)
return financial_data
except Exception as e:
return pd.DataFrame()


def safe_float(value, default=0):
"""安全转换为浮点数"""
try:
if pd.isna(value) or value is None or value == '-':
return default
return float(value)
except:
return default


def calculate_growth_rate(current, previous):
"""计算增长率"""
if previous == 0 or pd.isna(previous):
return 0
return (current - previous) / abs(previous)


def process_hk_stock(code, hk_spot_info):
"""处理单只港股数据"""
try:
# 获取实时行情信息
stock_info = hk_spot_info[hk_spot_info['代码'] == code]
if stock_info.empty:
print(f"{code} 未找到实时信息")
return None

# 基础信息
name = stock_info['名称'].values[0]
current_price = stock_info['最新价'].values[0]
volume = stock_info['成交量'].values[0] if '成交量' in stock_info.columns else 0

# 跳过无效数据
if pd.isna(current_price) or current_price == 0:
print(f"{code} 无有效价格")
return None

# 获取财务指标数据
financial_data = get_hk_financial_indicators(code)

if financial_data.empty:
print(f"{code} 无财务数据")
return None

# 获取最新一期数据
latest_data = financial_data.iloc[0]

# 构建基础结果
result = {
"股票名称": name,
"代码": code,
"行业": "港股", # 暂时使用固定值
"总市值": 0, # 需要计算
"当前价格": current_price,
"总股本": 0, # 需要从其他数据源获取
}

# 从财务指标中提取数据
# 每股收益
eps = safe_float(latest_data.get('BASIC_EPS', 0))
# 每股净资产
bps = safe_float(latest_data.get('BPS', 0))
# 每股经营现金流
cfps = safe_float(latest_data.get('PER_NETCASH_OPERATE', 0))

# 计算市盈率、市净率等
if eps > 0:
pe_ratio = current_price / eps
else:
pe_ratio = 0

if bps > 0:
pb_ratio = current_price / bps
else:
pb_ratio = 0

# 获取营业收入和净利润相关数据
revenue = safe_float(latest_data.get('TOTAL_OPERATE_INCOME', 0))
net_profit = safe_float(latest_data.get('NETPROFIT', 0))
deducted_profit = safe_float(latest_data.get('DEDUCTNP', 0))

# 获取总资产和总负债
total_assets = safe_float(latest_data.get('TOT_ASSETS', 0))
total_liabilities = safe_float(latest_data.get('TOT_LIAB', 0))

# 填充财务数据
result.update({
"扣非净利润-TTM": to_yi_round2(deducted_profit) if deducted_profit else 0,
"扣非净利润同比-TTM": 0, # 需要历史数据计算
"扣非利润年复合同比": 0,
"扣非净利润增速-TTM": 0,
"当季扣非净利润同比": 0,
"当季扣非净利润增速": 0,
"营业总收入-TTM": to_yi_round2(revenue) if revenue else 0,
"营业总收入同比-TTM": 0,
"营业总收入年复合同比": 0,
"营业总收入增速-TTM": 0,
"当季营业总收入同比": 0,
"当季营业总收入增速": 0,
"当季毛利率": 0,
"当季毛利率同比": 0,
"当季毛利率同比增速": 0,
"自由现金流": to_yi_round2(cfps * bps) if cfps and bps else 0,
"自由现金流复合增长率": 0,
"总负债": to_yi_round2(total_liabilities),
"总资产": to_yi_round2(total_assets),
"少数股东损益": 0,
"当季三费同比": 0,
"商誉": 0,
"商誉占比": 0,
"市净率": pb_ratio,
"市销率": current_price * volume / revenue if revenue > 0 else 0,
"市盈率-TTM": pe_ratio,
"市盈率-TTM(财报)": pe_ratio,
"ROE": safe_float(latest_data.get('AVG_ROE', 0)),
"ROE_qualified-TTM": safe_float(latest_data.get('DILUTED_ROE', 0)),
"每股分红": 0,
"股息率": 0,
"分红率": 0,
"每股收益": eps,
"每股收益-占比": eps / current_price if current_price > 0 else 0,
"每股FCF": cfps,
"每股FCF-占比": cfps / current_price if current_price > 0 else 0,
"每股资产": bps,
"每股资产-占比": bps / current_price if current_price > 0 else 0,
"每股负债": bps * (total_liabilities / total_assets) if total_assets > 0 else 0,
"每股负债-占比": (bps * (total_liabilities / total_assets)) / current_price if total_assets > 0 and current_price > 0 else 0,
})

# 如果有多期数据,计算同比增长
if len(financial_data) > 4:
try:
# 获取去年同期数据
last_year_data = financial_data.iloc[4]

# 计算扣非净利润同比
last_deducted_profit = safe_float(last_year_data.get('DEDUCTNP', 0))
if last_deducted_profit != 0:
result["扣非净利润同比-TTM"] = calculate_growth_rate(deducted_profit, last_deducted_profit)
result["扣非净利润增速-TTM"] = result["扣非净利润同比-TTM"]

# 计算营业收入同比
last_revenue = safe_float(last_year_data.get('TOTAL_OPERATE_INCOME', 0))
if last_revenue != 0:
result["营业总收入同比-TTM"] = calculate_growth_rate(revenue, last_revenue)
result["营业总收入增速-TTM"] = result["营业总收入同比-TTM"]
except:
pass

return result

except Exception as e:
print(f"处理 {code} 时出错: {e}")
return None


def main():
"""主函数"""
try:
# 创建输出目录
os.makedirs("./daily_report", exist_ok=True)

# 获取港股列表
hk_stocks = get_hk_stock_list()
if hk_stocks.empty:
print("无法获取港股列表")
return

# 限制处理数量(测试时可以减少)
# hk_stocks = hk_stocks.head(50) # 测试时只处理前50只

results = []
failed_count = 0

# 处理每只股票
for idx, row in tqdm(hk_stocks.iterrows(), total=len(hk_stocks), desc="处理港股数据"):
code = row['代码']
result = process_hk_stock(code, hk_stocks)

if result:
results.append(result)
else:
failed_count += 1

# 每处理10只股票暂停一下,避免请求过快
if (idx + 1) % 10 == 0:
time.sleep(1)

# 保存结果
if results:
df_results = pd.DataFrame(results)

# 确保所有必需的列都存在
required_columns = [
"股票名称", "代码", "行业", "总市值", "当前价格", "总股本",
"扣非净利润-TTM", "扣非净利润同比-TTM", "扣非利润年复合同比",
"扣非净利润增速-TTM", "当季扣非净利润同比", "当季扣非净利润增速",
"营业总收入-TTM", "营业总收入同比-TTM", "营业总收入年复合同比",
"营业总收入增速-TTM", "当季营业总收入同比", "当季营业总收入增速",
"当季毛利率", "当季毛利率同比", "当季毛利率同比增速",
"自由现金流", "自由现金流复合增长率", "总负债", "总资产",
"少数股东损益", "当季三费同比", "商誉", "商誉占比",
"市净率", "市销率", "市盈率-TTM", "市盈率-TTM(财报)",
"ROE", "ROE_qualified-TTM", "每股分红", "股息率", "分红率",
"每股收益", "每股收益-占比", "每股FCF", "每股FCF-占比",
"每股资产", "每股资产-占比", "每股负债", "每股负债-占比"
]

# 确保所有列都存在
for col in required_columns:
if col not in df_results.columns:
df_results[col] = 0

# 按照要求的列顺序排列
df_results = df_results[required_columns]

# 格式化百分比列
percentage_columns = [
"扣非净利润同比-TTM", "扣非利润年复合同比", "扣非净利润增速-TTM",
"当季扣非净利润同比", "当季扣非净利润增速", "营业总收入同比-TTM",
"营业总收入年复合同比", "营业总收入增速-TTM", "当季营业总收入同比",
"当季营业总收入增速", "当季毛利率", "当季毛利率同比",
"当季毛利率同比增速", "自由现金流复合增长率", "当季三费同比",
"商誉占比", "股息率", "分红率", "每股收益-占比", "每股FCF-占比",
"每股资产-占比", "每股负债-占比"
]

for col in percentage_columns:
df_results[col] = df_results[col].apply(lambda x: f"{x*100:.2f}%" if pd.notna(x) else "0.00%")

# 保存文件
save_path = f"./daily_report/hk_stock_data_{current_date}.csv"
df_results.to_csv(save_path, index=False, encoding='utf-8-sig')

print(f"\n处理完成!")
print(f"成功处理: {len(results)} 只股票")
print(f"失败处理: {failed_count} 只股票")
print(f"数据已保存到: {save_path}")

else:
print("没有成功处理任何股票数据")

except Exception as e:
print(f"程序执行出错: {e}")
import traceback
traceback.print_exc()


if __name__ == "__main__":
main()