fintie.stock.announcement 源代码

# -*- coding: utf-8 -*-
# This file is part of fintie.

# Copyright (C) 2018-present qytz <hhhhhf@foxmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""本模块负责获取上市公司公告文件

信息获取通道:

    post http://www.cninfo.com.cn/cninfo-new/announcement/query

    post form::

        category: category_ndbg_szsh;category_bndbg_szsh;
        column:
        limit:
        pageNum: 1
        pageSize: 30
        searchkey: // 搜索关键字
        seDate: // 查询日期: 2018-07-28 或者 2018-03-13+~+2018-07-28
        sortName:
        sortType:
        stock: 300121
        tabName: fulltext

    category 类型

        * name="category_ndbg_szsh" title="年度报告"
        * name="category_bndbg_szsh" title="半年度报告"
        * name="category_yjdbg_szsh" title="一季度报告"
        * name="category_sjdbg_szsh" title="三季度报告"
        * name="category_scgkfx_szsh" title="首次公开发行及上市"
        * name="category_pg_szsh" title="配股"
        * name="category_zf_szsh" title="增发"
        * name="category_kzhz_szsh" title="可转换债券"
        * name="category_qzxg_szsh" title="权证相关公告"
        * name="category_qtrz_szsh" title="其他融资"
        * name="category_qyfpxzcs_szsh" title="权益及限制出售股份"
        * name="category_gqbd_szsh" title="股权变动"
        * name="category_jy_szsh" title="交易"
        * name="category_gddh_szsh" title="股东大会"
        * name="category_cqfxyj_szsh" title="澄清风险业绩预告"
        * name="category_tbclts_szsh" title="特别处理和退市"
        * name="category_bcgz_szsh" title="补充及更正"
        * name="category_zjjg_szsh" title="中介机构报告"
        * name="category_ssgszd_szsh" title="上市公司制度"
        * name="category_zqgg_szsh" title="债券公告"
        * name="category_qtzdsx_szsh" title="其它重大事项"
        * name="category_tzzgx_szsh" title="投资者关系信息"
        * name="category_dshgg_szsh" title="董事会公告"
        * name="category_jshgg_szsh" title="监事会公告"

加载已保存的数据::

    import json

    from pathlib import Path
    with Path("xxx.json").open(encoding="utf-8") as f:
        data = json.load(f)
"""
import os
import json
import asyncio
import logging
from pathlib import Path
from datetime import datetime, date
from dateutil.relativedelta import relativedelta

import click
import aiohttp

from .cli import stock_cli_group, MODULE_DATA_DIR
from ..utils import parse_dt, fetch_http_data, add_doc


logger = logging.getLogger(__file__)
__all__ = ["get_announcements", "async_get_announcements"]


async def _init(session):
    return True


async def _get_one_announcement(session, url, fname):
    try:
        resp = await session.get(url)
        if resp.status != 200:
            logger.warning(
                "Download announcement %s from url failed: http %s", url, resp.status
            )
            return None
        raw_data = await resp.read()
    except (aiohttp.ServerTimeoutError, asyncio.TimerHandle):
        logger.warning("Download announcement %s failed:timeout", fname)
        return None
    with Path(fname).open("wb") as statf:
        statf.write(raw_data)
    return True


[文档]async def async_get_announcements( session, symbol, categories, data_path, start_date=None, end_date=None, search_key="", ): """获取公告文件 :param session: `aiohttp.ClientSession` 对象,同步接口不需要传 :param symbol: 股票代码 :param categories: 公告文件类别 :param data_path: 数据保存路径 :param start_date: 公共查询起始时间 :param end_date: 公告查询截止时间 :param search_key: 公告查询搜索关键字 :returns: None 接口用于下载公告原文进行人工分析,不返回任何数据 catetories:: all 所有类别 ndbg 年度报告 bndbg 半年度报告 yjdbg 一季度报告 sjdbg 三季度报告 scgkfx 首次公开发行及上市 pg 配股 zf 增发 kzhz 可转换债券 qzxg 权证相关公告 qtrz 其他融资 qyfpxzcs 权益及限制出售股份 gqbd 股权变动 jy 交易 gddh 股东大会 cqfxyj 澄清风险业绩预告 tbclts 特别处理和退市 bcgz 补充及更正 zjjg 中介机构报告 ssgszd 上市公司制度 zqgg 债券公告 qtzdsx 其它重大事项 tzzgx 投资者关系信息 dshgg 董事会公告 jshgg 监事会公告 """ await _init(session) cate_list = [f"category_{cat}_szsh" for cat in categories] category_str = ";".join(cate_list) + ";" date_str = "" if start_date and end_date: date_str = f"{start_date}+~+{end_date}" page_size = 30 page_cnt = 1 page_num = 1 logger.info("Downloading announcements data for %s", symbol) query_url = "http://www.cninfo.com.cn/information/companyinfo_n.html" await session.get(query_url) post_url = "http://www.cninfo.com.cn/cninfo-new/announcement/query" cninfo_symbol = symbol if cninfo_symbol.startswith("SZ") or cninfo_symbol.startswith("SH"): cninfo_symbol = cninfo_symbol[2:] post_form = { "category": category_str, "column": "", "limit": "", "pageNum": 1, "pageSize": page_size, "searchkey": search_key, "seDate": date_str, "sortName": "", "sortType": "", "stock": cninfo_symbol, "tabName": "fulltext", } announcements = [] logger.info("Downloading announcements meta data for %s", symbol) while page_num <= page_cnt: post_form["pageNum"] = [page_num] try: async with session.post(post_url, data=post_form) as resp: if resp.status != 200: logger.warning( "Download announcement metadata from %s failed: http %s", post_url, resp.status, ) break data = await resp.json() if "totalAnnouncement" in data: total_cnt = data["totalAnnouncement"] elif "totalRecordNum" in data: total_cnt = data["totalRecordNum"] else: logger.warning( "Download announcements meta data for %s failed: %s", symbol, data, ) break page_cnt = total_cnt // page_num if total_cnt % page_num != 0: page_cnt += 1 if "announcements" in data: announcements.extend(data["announcements"]) except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e: logger.warning( "Download announcements meta data for %s timeout: %s", symbol, e ) continue page_num += 1 if not announcements: logger.warning("no announcements data found for %s", symbol) return logger.info("Download announcements meta data for %s finished", symbol) symbol_data_dir = Path(data_path) / MODULE_DATA_DIR / symbol / "announcements" os.makedirs(symbol_data_dir, exist_ok=True) meta_file = symbol_data_dir / f"{symbol}_meta.json" with meta_file.open("w", encoding="utf-8") as dataf: json.dump(announcements, dataf, indent=4, ensure_ascii=False) aws = [] logger.info("Downloading announcements files for %s", symbol) for announcement in announcements: logger.info( "Downloading %s from %s", announcement["announcementTitle"], announcement["adjunctUrl"], ) annou_name = announcement.get("announcementTitle", None) if not annou_name: annou_name = announcement.get("announcementId", None) if not annou_name: annou_name = announcement.get("orgId", None) if not annou_name: logger.warning("No file name found, skipped: %s", announcement) continue annou_time = "" if "announcementTime" in announcement: annou_time = datetime.fromtimestamp(announcement["announcementTime"] / 1000) annou_time = annou_time.strftime("%Y%m%d%H%M%S") ftype = announcement.get("adjunctType", "raw") fpath = symbol_data_dir / f"{annou_name}-{annou_time}.{ftype}" url = "http://www.cninfo.com.cn/" + announcement["adjunctUrl"] aws.append(_get_one_announcement(session, url, fpath)) await asyncio.gather(*aws, return_exceptions=True) logger.info("Download announcements files for %s finished", symbol) return None
[文档]@add_doc(async_get_announcements.__doc__) def get_announcements(*args, **kwargs): ret = fetch_http_data(async_get_announcements, *args, **kwargs) if isinstance(ret, Exception): raise ret return ret
@click.option("-s", "--symbol", type=str, required=True) @click.option( "-st", "--start", default=str(date.today().replace(month=1, day=1) - relativedelta(years=3)), show_default=True, ) @click.option("-ed", "--end", default=str(date.today()), show_default=True) @click.option( "-ct", "--category", default="ndbg,bndbg,yjdbg,sjdbg", show_default=True, type=str ) @click.option("-se", "--search", default=True) @click.option( "-f", "--file-path", "save_path", type=click.Path(exists=False) ) @stock_cli_group.command("announce", short_help="获取公告原文") @click.pass_context def announcements_cli(ctx, symbol, start, end, save_path, category, search): """从cninfo获取公告原文 symbol: 使用雪球网的代码格式 announcements category \b all 所有类别 ndbg 年度报告 bndbg 半年度报告 yjdbg 一季度报告 sjdbg 三季度报告 scgkfx 首次公开发行及上市 pg 配股 zf 增发 kzhz 可转换债券 qzxg 权证相关公告 qtrz 其他融资 qyfpxzcs 权益及限制出售股份 gqbd 股权变动 jy 交易 gddh 股东大会 cqfxyj 澄清风险业绩预告 tbclts 特别处理和退市 bcgz 补充及更正 zjjg 中介机构报告 ssgszd 上市公司制度 zqgg 债券公告 qtzdsx 其它重大事项 tzzgx 投资者关系信息 dshgg 董事会公告 jshgg 监事会公告 """ start_dt = parse_dt(start, return_date=True) end_dt = parse_dt(end, return_date=True) if not save_path: save_path = ctx.obj["data_path"] data = get_announcements( symbol, category.split(","), save_path, start_dt, end_dt, search_key="" ) if isinstance(data, Exception): raise data if __name__ == "__main__": announcements_cli()