Data_Analysis/数据准备过程/模拟数据生成-提示词/generate_simulated_ae.py

321 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""生成贝朗相关产品模拟不良事件数据(合成数据,仅供分析/培训)。
依据:贝朗数据/生成不良事件模拟数据.md
"""
from __future__ import annotations
import random
from datetime import date, timedelta
from pathlib import Path
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
ROOT = Path(__file__).resolve().parent
HEADERS = [
"报告编码",
"CC",
"单位名称",
"事业线",
"产品名称",
"注册证编号/曾用注册证编号",
"注册人",
"型号",
"产品批号",
"伤害",
"伤害表现",
"器械故障表现",
"审核日期",
]
# 产品名:基于贝朗中国常见产品线及行业通用命名归纳;具体注册信息以 NMPA 为准(本表为模拟)
# events: (伤害表现K, 器械故障表现L)J 列固定为「是」(见提示词)
PRODUCT_PROFILES: list[dict] = [
{
"line": "输液治疗",
"name": "一次性使用静脉留置针",
"weight": 180,
"models": ("4251624", "4251625", "Introcan Safety 20G", "Introcan 18G"),
"events": [
("穿刺部位疼痛", "套管与导管座连接处渗漏"),
("血肿", "回血观察窗模糊影响血流判断"),
("出血", "针尖保护装置回弹不畅"),
("红斑", "肝素帽旋紧后微量渗液"),
("瘙痒", "延长管打折致滴速下降"),
("水肿", "留置针固定翼粘贴失效"),
("静脉炎", "导管腔内回血阻力增高"),
("局部感染征象", "三通接头裂纹"),
],
},
{
"line": "输液治疗",
"name": "一次性使用输液器",
"weight": 160,
"models": ("IS-7.0", "IS-5.0", "4053000"),
"events": [
("输液部位疼痛", "滴斗液面波动异常"),
("空气栓塞相关症状(已处理)", "管路接头松动致滴管内进气"),
("心悸", "流量调节轮锁止不良"),
("瘙痒", "精密过滤器外壳裂纹"),
("渗漏致皮肤红斑", "穿刺器与药瓶胶塞密封不严"),
("恶心", "滴速过快相关不适"),
("寒战", "输液管路可见异物附着"),
],
},
{
"line": "输液治疗",
"name": "一次性使用精密过滤输液器",
"weight": 90,
"models": ("PF-5.0", "PF-7.0"),
"events": [
("低血压", "过滤膜侧压力升高致滴速骤降"),
("头痛", "排气孔堵塞需二次排气"),
("胸闷", "侧孔进气不畅"),
("发热", "过滤器下游管路温度异常升高"),
],
},
{
"line": "输液治疗",
"name": "一次性使用输注延长管",
"weight": 70,
"models": ("EX-50cm", "EX-100cm"),
"events": [
("输注中断相关焦虑", "螺旋接口与泵管不匹配"),
("药物输注延迟", "延长管扭转触发流量报警"),
("局部肿胀", "接口渗液"),
],
},
{
"line": "输液治疗",
"name": "一次性使用肠内营养输液器",
"weight": 50,
"models": ("EN-SET-1.2", "EN-SET-2.0"),
"events": [
("腹胀", "营养袋接口与泵管连接处渗漏"),
("呕吐", "滴速传感器识别不稳定"),
("腹泻", "营养液温度偏低相关不适"),
],
},
{
"line": "透析产品",
"name": "血液透析器",
"weight": 120,
"models": ("Diacap Pro 1.3", "Diacap 1.4", "HD-180"),
"events": [
("透析中低血压", "跨膜压波动偏大"),
("出血", "动脉端管路接头渗液"),
("头痛", "透析液侧压力传感器报警"),
("肌肉痉挛", "透析器外壳细微裂纹"),
("恶心", "超滤率设置与监测不一致"),
("胸痛", "静脉压升高报警"),
],
},
{
"line": "透析产品",
"name": "血液透析浓缩液",
"weight": 60,
"models": ("BIC-35", "ACID-8L"),
"events": [
("恶心", "浓缩液桶盖密封条变形渗漏"),
("呕吐", "电导度监测短暂漂移"),
("低血压", "透析液成分配比异常相关不适"),
],
},
{
"line": "透析产品",
"name": "血液透析管路",
"weight": 55,
"models": ("AV-SET-A", "AV-SET-P"),
"events": [
("失血相关血红蛋白下降", "动脉壶液面持续下降"),
("出血", "泵管段磨损起皱"),
("寒战", "管路预冲残留气泡"),
],
},
{
"line": "外科产品",
"name": "可吸收性外科缝线",
"weight": 75,
"models": ("Novosyn 3-0", "Monosyn 4-0"),
"events": [
("切口裂开", "缝线结滑脱"),
("疼痛", "缝针弯曲"),
("出血", "线体断裂残留"),
("感染征象", "缝线张力过早丧失"),
],
},
{
"line": "外科产品",
"name": "非吸收性外科缝线",
"weight": 50,
"models": ("Premilene 3-0", "Dafilon 5-0"),
"events": [
("异物感", "线结切割组织"),
("水肿", "缝针与线体连接处松动"),
("红斑", "缝线表面粗糙刺激"),
],
},
{
"line": "外科产品",
"name": "一次性使用无菌手术膜",
"weight": 35,
"models": ("OP-FILM-45", "OP-FILM-60"),
"events": [
("皮肤红斑", "粘性不足边缘翘起"),
("疼痛", "去除敷料时皮肤撕脱"),
("瘙痒", "敷料下汗液积聚刺激"),
],
},
{
"line": "诊断/监测耗材",
"name": "一次性使用动脉采血器",
"weight": 30,
"models": ("ABG-3ml", "SAFE-ABG"),
"events": [
("血肿", "针头保护套脱落困难"),
("出血", "肝素化不足标本凝固需重新采血"),
("疼痛", "采血后桡动脉痉挛"),
],
},
{
"line": "输液治疗",
"name": "一次性使用无菌注射器",
"weight": 25,
"models": ("10ml Luer", "20ml Luer"),
"events": [
("疼痛", "推杆卡顿致推注阻力骤增"),
("给药剂量偏差相关不适", "刻度印刷模糊"),
("局部肿胀", "针头与针座连接处渗漏"),
],
},
]
REGISTRANTS = (
"贝朗医疗(上海)国际贸易有限公司",
"贝朗爱敦(上海)医疗管理有限公司",
"贝朗医疗(苏州)有限公司",
)
HOSPITALS = (
"上海市第一人民医院",
"浙江大学医学院附属第二医院",
"四川大学华西医院",
"广东省人民医院",
"华中科技大学同济医学院附属协和医院",
"中南大学湘雅医院",
"山东大学齐鲁医院",
"中国医科大学附属第一医院",
"西安交通大学第一附属医院",
"南京鼓楼医院",
"福建省立医院",
"重庆医科大学附属第一医院",
"天津市肿瘤医院",
"郑州大学第一附属医院",
"昆明医科大学第一附属医院",
)
CC_POOL = (
"质量反馈",
"临床使用",
"包装标识",
"灭菌外观",
"物流储运",
"培训咨询",
"不良事件",
)
def weighted_products() -> list[dict]:
pool: list[dict] = []
for p in PRODUCT_PROFILES:
pool.extend([p] * p["weight"])
return pool
def random_reg_number(rng: random.Random) -> str:
kinds = ("国械注进20", "国械注进201", "国械注准20", "国械注准201")
return f"{rng.choice(kinds)}{rng.randint(5, 9)}{rng.randint(100000, 999999)}"
def random_batch(rng: random.Random) -> str:
# 约 1% 概率重复上一批号(模拟同批聚集),其余唯一风格
return f"{rng.choice('ABCDEFGHJK')}{rng.randint(100000, 999999)}{rng.choice('0123456789')}"
def random_workday(rng: random.Random, end: date) -> date:
start = end - timedelta(days=365 * 2 + 120)
d = start + timedelta(days=rng.randint(0, (end - start).days))
while d.weekday() >= 5:
d -= timedelta(days=1)
return d
def build_rows(n: int, rng: random.Random, end_d: date) -> list[list]:
pool = weighted_products()
rows: list[list] = []
prev_batch: str | None = None
for i in range(1, n + 1):
prof = rng.choice(pool)
harm_k, device_fault = rng.choice(prof["events"])
code = f"SIM-2024-{i:06d}"
if prev_batch is not None and rng.random() < 0.01:
batch = prev_batch
else:
batch = random_batch(rng)
prev_batch = batch
rows.append(
[
code,
rng.choice(CC_POOL),
rng.choice(HOSPITALS),
prof["line"],
prof["name"],
random_reg_number(rng),
rng.choice(REGISTRANTS),
rng.choice(prof["models"]),
batch,
"",
harm_k,
device_fault,
random_workday(rng, end_d),
]
)
return rows
def main() -> None:
end_d = date.today()
rng = random.Random(int(end_d.strftime("%Y%m%d")))
out_name = f"不良事件数据-模拟1000条-{end_d:%Y%m%d}.xlsx"
out_path = ROOT / out_name
wb = Workbook()
ws = wb.active
assert ws is not None
ws.title = "POWER BI 总信息"
ws.append(HEADERS)
for row in build_rows(1000, rng, end_d):
ws.append(row)
m_col = HEADERS.index("审核日期") + 1
for r in range(2, 1002):
c = ws.cell(row=r, column=m_col)
if isinstance(c.value, date):
c.number_format = "YYYY-MM-DD"
for j, _ in enumerate(HEADERS, start=1):
ws.column_dimensions[get_column_letter(j)].width = min(28, 12 + len(HEADERS[j - 1]) // 2)
wb.save(out_path)
print(f"Written: {out_path}")
if __name__ == "__main__":
main()