# -*- coding: utf-8 -*- """生成药品不良事件&投诉模拟数据(1000行)。""" from __future__ import annotations import random from datetime import date, timedelta from pathlib import Path from openpyxl import Workbook, load_workbook from openpyxl.utils import get_column_letter ROOT = Path(__file__).resolve().parent TEMPLATE = ROOT / "药品不良事件&投诉数据-表头.xlsx" CASE_OWNERS = ["PV专员-张", "PV专员-李", "药物警戒-王", "PV经理-陈"] REPORTERS = ["销售-赵", "医学联络-周", "医院上报-吴", "客服-郑"] INBOUND = ["电话", "邮件", "销售反馈", "医疗机构上报", "监管转入"] FEEDBACK_CODE = ["有效", "待补充", "无效"] FIRST_FOLLOW = ["首次", "跟踪"] EXPECTED = ["预期", "非预期"] REPORT_TYPE = ["自发报告", "文献", "上市后研究", "其他"] SERIOUS = ["是", "否"] UNIT_TYPE = ["三级医院", "二级医院", "基层医疗机构", "药店", "其他"] SEX = ["男", "女"] ETHNIC = ["汉族", "回族", "满族", "壮族", "苗族", "其他"] BOOL_YN = ["是", "否"] YN_UNK = ["是", "否", "不详", "未再用"] INFO_SRC = ["医务人员", "患者", "文献", "监管机构", "公司主动收集"] IMPORTANT_INFO = ["肝功能异常", "肾功能异常", "妊娠", "过敏体质", "其他", "无"] SUSPECT_COMBI = ["怀疑药品", "合并用药"] DOSAGE_UNITS = ["mg", "ml", "片", "粒", "支"] FREQ = ["qd", "bid", "tid", "qod", "每周一次"] ROUTE = ["口服", "静脉滴注", "肌内注射", "皮下注射"] DRUGS = [ { "approval": "国药准字H20103001", "generic": "头孢呋辛酯片", "brand": "新福辛", "form": "片剂", "mfg": "贝朗制药(苏州)有限公司", "dose_range": (250, 500), "dose_unit": "mg", "route": "口服", "indications": ["呼吸道感染", "泌尿系感染"], }, { "approval": "国药准字H20153077", "generic": "甲硝唑氯化钠注射液", "brand": "美舒宁", "form": "注射剂", "mfg": "贝朗医疗制药(上海)有限公司", "dose_range": (100, 250), "dose_unit": "ml", "route": "静脉滴注", "indications": ["腹腔感染", "术后感染预防"], }, { "approval": "国药准字H20064211", "generic": "左氧氟沙星氯化钠注射液", "brand": "利复宁", "form": "注射剂", "mfg": "贝朗医疗制药(上海)有限公司", "dose_range": (100, 200), "dose_unit": "ml", "route": "静脉滴注", "indications": ["肺部感染", "皮肤软组织感染"], }, ] REACTIONS = [ ("皮疹", "Skin and subcutaneous tissue disorders", "皮肤及皮下组织类疾病"), ("恶心", "Gastrointestinal disorders", "胃肠系统疾病"), ("呕吐", "Gastrointestinal disorders", "胃肠系统疾病"), ("头晕", "Nervous system disorders", "神经系统疾病"), ("肝功能异常", "Hepatobiliary disorders", "肝胆系统疾病"), ("过敏反应", "Immune system disorders", "免疫系统疾病"), ("注射部位反应", "General disorders and administration site conditions", "全身性疾病及给药部位各种反应"), ] RESULTS = ["痊愈", "好转", "未好转", "不详", "死亡"] SEVERITY = ["轻度", "中度", "重度"] CONCLUSION = ["可能有关", "很可能有关", "无法评价", "待随访"] PROV_CITY_DIST = [ ("上海市", "上海市", "浦东新区"), ("浙江省", "杭州市", "西湖区"), ("广东省", "广州市", "越秀区"), ("四川省", "成都市", "武侯区"), ("江苏省", "南京市", "鼓楼区"), ("山东省", "济南市", "历下区"), ] def template_headers() -> list[str]: wb = load_workbook(TEMPLATE, read_only=True, data_only=True) ws = wb.active headers = [ws.cell(1, j).value for j in range(1, ws.max_column + 1)] wb.close() return headers def rand_date(rng: random.Random, start: date, end: date) -> date: return start + timedelta(days=rng.randint(0, (end - start).days)) def make_row(i: int, rng: random.Random, today: date) -> list: receipt = rand_date(rng, today - timedelta(days=900), today - timedelta(days=1)) entry = receipt + timedelta(days=rng.randint(0, 3)) submit = entry + timedelta(days=rng.randint(0, 5)) report_ha = "是" if rng.random() < 0.85 else "否" due_adr = receipt + timedelta(days=rng.randint(3, 15)) if report_ha == "是" else None first_follow = rng.choices(FIRST_FOLLOW, weights=[0.8, 0.2], k=1)[0] serious_ae = rng.choices(SERIOUS, weights=[0.2, 0.8], k=1)[0] age = rng.randint(18, 85) birth = receipt - timedelta(days=365 * age + rng.randint(0, 364)) weight = round(rng.uniform(45, 95), 1) reaction_date = receipt - timedelta(days=rng.randint(0, 20)) if reaction_date < today - timedelta(days=1200): reaction_date = today - timedelta(days=1200) result = rng.choices(RESULTS, weights=[0.4, 0.35, 0.15, 0.08, 0.02], k=1)[0] death_time = None death_cause = None if result == "死亡": death_time = reaction_date + timedelta(days=rng.randint(0, 15)) death_cause = rng.choice(["感染性休克", "呼吸衰竭", "多器官功能衰竭"]) has_hist = rng.random() < 0.28 has_family = rng.random() < 0.2 react_cn, pt, soc = rng.choice(REACTIONS) drug = rng.choice(DRUGS) med_start = reaction_date - timedelta(days=rng.randint(1, 30)) med_days = rng.randint(1, 30) med_end = med_start + timedelta(days=med_days - 1) serious_lv = "重度" if serious_ae == "是" else rng.choice(["轻度", "中度"]) province, city, district = rng.choice(PROV_CITY_DIST) important = rng.choice(IMPORTANT_INFO) important_other = "合并高脂血症" if important == "其他" else "" report_date = submit + timedelta(days=rng.randint(0, 5)) return [ rng.choice(CASE_OWNERS), rng.choice(REPORTERS), rng.choice(INBOUND), receipt, entry, report_ha, due_adr, f"AER-{today:%Y}-{i:06d}", f"CC-{today:%Y}-{i:06d}", submit, rng.choice(FEEDBACK_CODE), first_follow, rng.choice(EXPECTED), rng.choice(REPORT_TYPE), serious_ae, rng.choice(UNIT_TYPE), rng.choice(SEX), birth, age, "岁", rng.choice(ETHNIC), weight, "有" if has_hist else "无", "有" if has_family else "无", reaction_date, f"患者用药后出现{react_cn},经对症处理后病情{'好转' if result!='死亡' else '加重'}。", result, "无" if result in ("痊愈", "好转") else ("轻度皮肤色素沉着" if result == "未好转" else ""), death_time, death_cause, rng.choice(YN_UNK), rng.choice(YN_UNK), rng.choice(["无明显影响", "原患疾病短暂波动", "加重原患疾病"]), rng.choice(CONCLUSION), rng.choice(CONCLUSION), report_date, rng.choice(INFO_SRC), "模拟数据,仅用于分析演示", rng.choice(drug["indications"]), react_cn if has_family else "", rng.choice([d["brand"] for d in DRUGS]) if has_family else "", rng.choice(SEVERITY) if has_family else "", react_cn if has_hist else "", rng.choice(SEVERITY) if has_hist else "", rng.choice([d["brand"] for d in DRUGS]) if has_hist else "", important, important_other, rng.choices(SUSPECT_COMBI, weights=[0.7, 0.3], k=1)[0], 1, drug["approval"], drug["generic"], drug["brand"], drug["form"], drug["mfg"], f"{rng.choice('ABCDEFGH')}{rng.randint(100000,999999)}", round(rng.uniform(*drug["dose_range"]), 1), drug["dose_unit"] if rng.random() < 0.85 else rng.choice(DOSAGE_UNITS), rng.choice(FREQ), med_days, drug["route"] if rng.random() < 0.85 else rng.choice(ROUTE), med_start, med_end, rng.choice(drug["indications"]), react_cn, serious_lv, pt, soc, province, city, district, ] def main() -> None: headers = template_headers() today = date.today() rng = random.Random(int(today.strftime("%Y%m%d"))) out = ROOT / f"药品不良事件&投诉数据-模拟1000条-{today:%Y%m%d}.xlsx" wb = Workbook() ws = wb.active ws.title = "Tabelle1" ws.append(headers) for i in range(1, 1001): ws.append(make_row(i, rng, today)) # date formats date_cols = [4, 5, 7, 10, 18, 25, 29, 36, 61, 62] for r in range(2, 1002): for c in date_cols: v = ws.cell(r, c).value if isinstance(v, date): ws.cell(r, c).number_format = "YYYY-MM-DD" for i in range(1, len(headers) + 1): ws.column_dimensions[get_column_letter(i)].width = 15 wb.save(out) print(f"Written: {out}") if __name__ == "__main__": main()