254 lines
8.9 KiB
Python
254 lines
8.9 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""生成药品不良事件&投诉模拟数据(1000行)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import random
|
||
from datetime import date, timedelta
|
||
from pathlib import Path
|
||
|
||
from openpyxl import Workbook, load_workbook
|
||
from openpyxl.utils import get_column_letter
|
||
|
||
ROOT = Path(__file__).resolve().parent
|
||
TEMPLATE = ROOT / "药品不良事件&投诉数据-表头.xlsx"
|
||
|
||
CASE_OWNERS = ["PV专员-张", "PV专员-李", "药物警戒-王", "PV经理-陈"]
|
||
REPORTERS = ["销售-赵", "医学联络-周", "医院上报-吴", "客服-郑"]
|
||
INBOUND = ["电话", "邮件", "销售反馈", "医疗机构上报", "监管转入"]
|
||
FEEDBACK_CODE = ["有效", "待补充", "无效"]
|
||
FIRST_FOLLOW = ["首次", "跟踪"]
|
||
EXPECTED = ["预期", "非预期"]
|
||
REPORT_TYPE = ["自发报告", "文献", "上市后研究", "其他"]
|
||
SERIOUS = ["是", "否"]
|
||
UNIT_TYPE = ["三级医院", "二级医院", "基层医疗机构", "药店", "其他"]
|
||
SEX = ["男", "女"]
|
||
ETHNIC = ["汉族", "回族", "满族", "壮族", "苗族", "其他"]
|
||
BOOL_YN = ["是", "否"]
|
||
YN_UNK = ["是", "否", "不详", "未再用"]
|
||
INFO_SRC = ["医务人员", "患者", "文献", "监管机构", "公司主动收集"]
|
||
IMPORTANT_INFO = ["肝功能异常", "肾功能异常", "妊娠", "过敏体质", "其他", "无"]
|
||
SUSPECT_COMBI = ["怀疑药品", "合并用药"]
|
||
DOSAGE_UNITS = ["mg", "ml", "片", "粒", "支"]
|
||
FREQ = ["qd", "bid", "tid", "qod", "每周一次"]
|
||
ROUTE = ["口服", "静脉滴注", "肌内注射", "皮下注射"]
|
||
|
||
DRUGS = [
|
||
{
|
||
"approval": "国药准字H20103001",
|
||
"generic": "头孢呋辛酯片",
|
||
"brand": "新福辛",
|
||
"form": "片剂",
|
||
"mfg": "贝朗制药(苏州)有限公司",
|
||
"dose_range": (250, 500),
|
||
"dose_unit": "mg",
|
||
"route": "口服",
|
||
"indications": ["呼吸道感染", "泌尿系感染"],
|
||
},
|
||
{
|
||
"approval": "国药准字H20153077",
|
||
"generic": "甲硝唑氯化钠注射液",
|
||
"brand": "美舒宁",
|
||
"form": "注射剂",
|
||
"mfg": "贝朗医疗制药(上海)有限公司",
|
||
"dose_range": (100, 250),
|
||
"dose_unit": "ml",
|
||
"route": "静脉滴注",
|
||
"indications": ["腹腔感染", "术后感染预防"],
|
||
},
|
||
{
|
||
"approval": "国药准字H20064211",
|
||
"generic": "左氧氟沙星氯化钠注射液",
|
||
"brand": "利复宁",
|
||
"form": "注射剂",
|
||
"mfg": "贝朗医疗制药(上海)有限公司",
|
||
"dose_range": (100, 200),
|
||
"dose_unit": "ml",
|
||
"route": "静脉滴注",
|
||
"indications": ["肺部感染", "皮肤软组织感染"],
|
||
},
|
||
]
|
||
|
||
REACTIONS = [
|
||
("皮疹", "Skin and subcutaneous tissue disorders", "皮肤及皮下组织类疾病"),
|
||
("恶心", "Gastrointestinal disorders", "胃肠系统疾病"),
|
||
("呕吐", "Gastrointestinal disorders", "胃肠系统疾病"),
|
||
("头晕", "Nervous system disorders", "神经系统疾病"),
|
||
("肝功能异常", "Hepatobiliary disorders", "肝胆系统疾病"),
|
||
("过敏反应", "Immune system disorders", "免疫系统疾病"),
|
||
("注射部位反应", "General disorders and administration site conditions", "全身性疾病及给药部位各种反应"),
|
||
]
|
||
|
||
RESULTS = ["痊愈", "好转", "未好转", "不详", "死亡"]
|
||
SEVERITY = ["轻度", "中度", "重度"]
|
||
CONCLUSION = ["可能有关", "很可能有关", "无法评价", "待随访"]
|
||
PROV_CITY_DIST = [
|
||
("上海市", "上海市", "浦东新区"),
|
||
("浙江省", "杭州市", "西湖区"),
|
||
("广东省", "广州市", "越秀区"),
|
||
("四川省", "成都市", "武侯区"),
|
||
("江苏省", "南京市", "鼓楼区"),
|
||
("山东省", "济南市", "历下区"),
|
||
]
|
||
|
||
|
||
def template_headers() -> list[str]:
|
||
wb = load_workbook(TEMPLATE, read_only=True, data_only=True)
|
||
ws = wb.active
|
||
headers = [ws.cell(1, j).value for j in range(1, ws.max_column + 1)]
|
||
wb.close()
|
||
return headers
|
||
|
||
|
||
def rand_date(rng: random.Random, start: date, end: date) -> date:
|
||
return start + timedelta(days=rng.randint(0, (end - start).days))
|
||
|
||
|
||
def make_row(i: int, rng: random.Random, today: date) -> list:
|
||
receipt = rand_date(rng, today - timedelta(days=900), today - timedelta(days=1))
|
||
entry = receipt + timedelta(days=rng.randint(0, 3))
|
||
submit = entry + timedelta(days=rng.randint(0, 5))
|
||
report_ha = "是" if rng.random() < 0.85 else "否"
|
||
due_adr = receipt + timedelta(days=rng.randint(3, 15)) if report_ha == "是" else None
|
||
|
||
first_follow = rng.choices(FIRST_FOLLOW, weights=[0.8, 0.2], k=1)[0]
|
||
serious_ae = rng.choices(SERIOUS, weights=[0.2, 0.8], k=1)[0]
|
||
|
||
age = rng.randint(18, 85)
|
||
birth = receipt - timedelta(days=365 * age + rng.randint(0, 364))
|
||
weight = round(rng.uniform(45, 95), 1)
|
||
|
||
reaction_date = receipt - timedelta(days=rng.randint(0, 20))
|
||
if reaction_date < today - timedelta(days=1200):
|
||
reaction_date = today - timedelta(days=1200)
|
||
|
||
result = rng.choices(RESULTS, weights=[0.4, 0.35, 0.15, 0.08, 0.02], k=1)[0]
|
||
death_time = None
|
||
death_cause = None
|
||
if result == "死亡":
|
||
death_time = reaction_date + timedelta(days=rng.randint(0, 15))
|
||
death_cause = rng.choice(["感染性休克", "呼吸衰竭", "多器官功能衰竭"])
|
||
|
||
has_hist = rng.random() < 0.28
|
||
has_family = rng.random() < 0.2
|
||
|
||
react_cn, pt, soc = rng.choice(REACTIONS)
|
||
drug = rng.choice(DRUGS)
|
||
|
||
med_start = reaction_date - timedelta(days=rng.randint(1, 30))
|
||
med_days = rng.randint(1, 30)
|
||
med_end = med_start + timedelta(days=med_days - 1)
|
||
|
||
serious_lv = "重度" if serious_ae == "是" else rng.choice(["轻度", "中度"])
|
||
province, city, district = rng.choice(PROV_CITY_DIST)
|
||
|
||
important = rng.choice(IMPORTANT_INFO)
|
||
important_other = "合并高脂血症" if important == "其他" else ""
|
||
|
||
report_date = submit + timedelta(days=rng.randint(0, 5))
|
||
|
||
return [
|
||
rng.choice(CASE_OWNERS),
|
||
rng.choice(REPORTERS),
|
||
rng.choice(INBOUND),
|
||
receipt,
|
||
entry,
|
||
report_ha,
|
||
due_adr,
|
||
f"AER-{today:%Y}-{i:06d}",
|
||
f"CC-{today:%Y}-{i:06d}",
|
||
submit,
|
||
rng.choice(FEEDBACK_CODE),
|
||
first_follow,
|
||
rng.choice(EXPECTED),
|
||
rng.choice(REPORT_TYPE),
|
||
serious_ae,
|
||
rng.choice(UNIT_TYPE),
|
||
rng.choice(SEX),
|
||
birth,
|
||
age,
|
||
"岁",
|
||
rng.choice(ETHNIC),
|
||
weight,
|
||
"有" if has_hist else "无",
|
||
"有" if has_family else "无",
|
||
reaction_date,
|
||
f"患者用药后出现{react_cn},经对症处理后病情{'好转' if result!='死亡' else '加重'}。",
|
||
result,
|
||
"无" if result in ("痊愈", "好转") else ("轻度皮肤色素沉着" if result == "未好转" else ""),
|
||
death_time,
|
||
death_cause,
|
||
rng.choice(YN_UNK),
|
||
rng.choice(YN_UNK),
|
||
rng.choice(["无明显影响", "原患疾病短暂波动", "加重原患疾病"]),
|
||
rng.choice(CONCLUSION),
|
||
rng.choice(CONCLUSION),
|
||
report_date,
|
||
rng.choice(INFO_SRC),
|
||
"模拟数据,仅用于分析演示",
|
||
rng.choice(drug["indications"]),
|
||
react_cn if has_family else "",
|
||
rng.choice([d["brand"] for d in DRUGS]) if has_family else "",
|
||
rng.choice(SEVERITY) if has_family else "",
|
||
react_cn if has_hist else "",
|
||
rng.choice(SEVERITY) if has_hist else "",
|
||
rng.choice([d["brand"] for d in DRUGS]) if has_hist else "",
|
||
important,
|
||
important_other,
|
||
rng.choices(SUSPECT_COMBI, weights=[0.7, 0.3], k=1)[0],
|
||
1,
|
||
drug["approval"],
|
||
drug["generic"],
|
||
drug["brand"],
|
||
drug["form"],
|
||
drug["mfg"],
|
||
f"{rng.choice('ABCDEFGH')}{rng.randint(100000,999999)}",
|
||
round(rng.uniform(*drug["dose_range"]), 1),
|
||
drug["dose_unit"] if rng.random() < 0.85 else rng.choice(DOSAGE_UNITS),
|
||
rng.choice(FREQ),
|
||
med_days,
|
||
drug["route"] if rng.random() < 0.85 else rng.choice(ROUTE),
|
||
med_start,
|
||
med_end,
|
||
rng.choice(drug["indications"]),
|
||
react_cn,
|
||
serious_lv,
|
||
pt,
|
||
soc,
|
||
province,
|
||
city,
|
||
district,
|
||
]
|
||
|
||
|
||
def main() -> None:
|
||
headers = template_headers()
|
||
today = date.today()
|
||
rng = random.Random(int(today.strftime("%Y%m%d")))
|
||
out = ROOT / f"药品不良事件&投诉数据-模拟1000条-{today:%Y%m%d}.xlsx"
|
||
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
ws.title = "Tabelle1"
|
||
ws.append(headers)
|
||
|
||
for i in range(1, 1001):
|
||
ws.append(make_row(i, rng, today))
|
||
|
||
# date formats
|
||
date_cols = [4, 5, 7, 10, 18, 25, 29, 36, 61, 62]
|
||
for r in range(2, 1002):
|
||
for c in date_cols:
|
||
v = ws.cell(r, c).value
|
||
if isinstance(v, date):
|
||
ws.cell(r, c).number_format = "YYYY-MM-DD"
|
||
for i in range(1, len(headers) + 1):
|
||
ws.column_dimensions[get_column_letter(i)].width = 15
|
||
|
||
wb.save(out)
|
||
print(f"Written: {out}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|