link
from lxml import etree
import pandas as pd
# — Load and parse BPMN file —
file_path = „bpmn_test.bpmn“
tree = etree.parse(file_path)
root = tree.getroot()
namespaces = {k if k is not None else „bpmn“: v for k, v in root.nsmap.items()}
# — Extract Service Tasks in Subprocesses —
subprocess_data = []
for subprocess in root.findall(„.//bpmn:subProcess“, namespaces):
subprocess_name = subprocess.get(„name“, „[no name]“)
for task in subprocess.findall(„.//bpmn:serviceTask“, namespaces):
task_id = task.get(„id“)
task_name = task.get(„name“, „[no name]“)
documentation = task.find(„.//bpmn:documentation“, namespaces)
doc_text = documentation.text if documentation is not None else „“
subprocess_data.append({
„Subprocess“: subprocess_name,
„Task ID“: task_id,
„Task Name“: task_name,
„Documentation“: doc_text
})
df_subprocesses = pd.DataFrame(subprocess_data)
# — Extract Exclusive Gateways —
gateway_data = []
for gateway in root.findall(„.//bpmn:exclusiveGateway“, namespaces):
gateway_id = gateway.get(„id“)
gateway_name = gateway.get(„name“, „[no name]“)
documentation = gateway.find(„.//bpmn:documentation“, namespaces)
doc_text = documentation.text if documentation is not None else „“
gateway_data.append({
„Gateway ID“: gateway_id,
„Gateway Name“: gateway_name,
„Documentation“: doc_text
})
df_gateways = pd.DataFrame(gateway_data)
# — Extract Start Events —
start_events_data = []
for start in root.findall(„.//bpmn:startEvent“, namespaces):
start_events_data.append({
„ID“: start.get(„id“),
„Name“: start.get(„name“, „[no name]“),
„Type“: start.tag.split(„}“)[1],
„Documentation“: (start.find(„.//bpmn:documentation“, namespaces).text
if start.find(„.//bpmn:documentation“, namespaces) is not None else „“)
})
df_starts = pd.DataFrame(start_events_data)
# — Extract End Events —
end_events_data = []
for end in root.findall(„.//bpmn:endEvent“, namespaces):
end_events_data.append({
„ID“: end.get(„id“),
„Name“: end.get(„name“, „[no name]“),
„Type“: end.tag.split(„}“)[1],
„Documentation“: (end.find(„.//bpmn:documentation“, namespaces).text
if end.find(„.//bpmn:documentation“, namespaces) is not None else „“)
})
df_ends = pd.DataFrame(end_events_data)
# — Extract User Tasks —
user_tasks_data = []
for task in root.findall(„.//bpmn:userTask“, namespaces):
user_tasks_data.append({
„ID“: task.get(„id“),
„Name“: task.get(„name“, „[no name]“),
„Documentation“: (task.find(„.//bpmn:documentation“, namespaces).text
if task.find(„.//bpmn:documentation“, namespaces) is not None else „“)
})
df_users = pd.DataFrame(user_tasks_data)
# — Extract Sequence Flows —
sequence_flows_data = []
for flow in root.findall(„.//bpmn:sequenceFlow“, namespaces):
condition = flow.find(„.//bpmn:conditionExpression“, namespaces)
sequence_flows_data.append({
„ID“: flow.get(„id“),
„Source“: flow.get(„sourceRef“),
„Target“: flow.get(„targetRef“),
„Condition“: condition.text if condition is not None else „“
})
df_flows = pd.DataFrame(sequence_flows_data)
# — Write to styled HTML —
output_html = „bpmn_full_documentation_styled.html“
with open(output_html, „w“, encoding=“utf-8″) as f:
f.write(„““
<html>
<head>
<style>
body { font-family: Arial, sans-serif; margin: 20px; background-color: #f9f9f9; }
h1 { color: #222222; }
h2 { color: #333333; margin-top: 40px; }
table { border-collapse: collapse; width: 100%; margin-bottom: 40px; background: #ffffff; }
th, td { border: 1px solid #cccccc; padding: 8px; text-align: left; }
th { background-color: #eeeeee; }
tr:nth-child(even) { background-color: #f2f2f2; }
</style>
</head>
<body>
<h1>BPMN Full Documentation</h1>
„““)
f.write(„<h2>Service Tasks in Subprocesses</h2>\n“)
f.write(df_subprocesses.to_html(index=False, escape=False))
f.write(„<h2>Exclusive Gateways</h2>\n“)
f.write(df_gateways.to_html(index=False, escape=False))
f.write(„<h2>Start Events</h2>\n“)
f.write(df_starts.to_html(index=False, escape=False))
f.write(„<h2>End Events</h2>\n“)
f.write(df_ends.to_html(index=False, escape=False))
f.write(„<h2>User Tasks</h2>\n“)
f.write(df_users.to_html(index=False, escape=False))
f.write(„<h2>Sequence Flows</h2>\n“)
f.write(df_flows.to_html(index=False, escape=False))
f.write(„</body></html>“)
print(f“HTML file successfully created: {output_html}“)