link

from lxml import etree

import pandas as pd

# — Load and parse BPMN file —

file_path = „bpmn_test.bpmn“

tree = etree.parse(file_path)

root = tree.getroot()

namespaces = {k if k is not None else „bpmn“: v for k, v in root.nsmap.items()}

# — Extract Service Tasks in Subprocesses —

subprocess_data = []

for subprocess in root.findall(„.//bpmn:subProcess“, namespaces):

subprocess_name = subprocess.get(„name“, „[no name]“)

for task in subprocess.findall(„.//bpmn:serviceTask“, namespaces):

task_id = task.get(„id“)

task_name = task.get(„name“, „[no name]“)

documentation = task.find(„.//bpmn:documentation“, namespaces)

doc_text = documentation.text if documentation is not None else „“

subprocess_data.append({

„Subprocess“: subprocess_name,

„Task ID“: task_id,

„Task Name“: task_name,

„Documentation“: doc_text

})

df_subprocesses = pd.DataFrame(subprocess_data)

# — Extract Exclusive Gateways —

gateway_data = []

for gateway in root.findall(„.//bpmn:exclusiveGateway“, namespaces):

gateway_id = gateway.get(„id“)

gateway_name = gateway.get(„name“, „[no name]“)

documentation = gateway.find(„.//bpmn:documentation“, namespaces)

doc_text = documentation.text if documentation is not None else „“

gateway_data.append({

„Gateway ID“: gateway_id,

„Gateway Name“: gateway_name,

„Documentation“: doc_text

})

df_gateways = pd.DataFrame(gateway_data)

# — Extract Start Events —

start_events_data = []

for start in root.findall(„.//bpmn:startEvent“, namespaces):

start_events_data.append({

„ID“: start.get(„id“),

„Name“: start.get(„name“, „[no name]“),

„Type“: start.tag.split(„}“)[1],

„Documentation“: (start.find(„.//bpmn:documentation“, namespaces).text

if start.find(„.//bpmn:documentation“, namespaces) is not None else „“)

})

df_starts = pd.DataFrame(start_events_data)

# — Extract End Events —

end_events_data = []

for end in root.findall(„.//bpmn:endEvent“, namespaces):

end_events_data.append({

„ID“: end.get(„id“),

„Name“: end.get(„name“, „[no name]“),

„Type“: end.tag.split(„}“)[1],

„Documentation“: (end.find(„.//bpmn:documentation“, namespaces).text

if end.find(„.//bpmn:documentation“, namespaces) is not None else „“)

})

df_ends = pd.DataFrame(end_events_data)

# — Extract User Tasks —

user_tasks_data = []

for task in root.findall(„.//bpmn:userTask“, namespaces):

user_tasks_data.append({

„ID“: task.get(„id“),

„Name“: task.get(„name“, „[no name]“),

„Documentation“: (task.find(„.//bpmn:documentation“, namespaces).text

if task.find(„.//bpmn:documentation“, namespaces) is not None else „“)

})

df_users = pd.DataFrame(user_tasks_data)

# — Extract Sequence Flows —

sequence_flows_data = []

for flow in root.findall(„.//bpmn:sequenceFlow“, namespaces):

condition = flow.find(„.//bpmn:conditionExpression“, namespaces)

sequence_flows_data.append({

„ID“: flow.get(„id“),

„Source“: flow.get(„sourceRef“),

„Target“: flow.get(„targetRef“),

„Condition“: condition.text if condition is not None else „“

})

df_flows = pd.DataFrame(sequence_flows_data)

# — Write to styled HTML —

output_html = „bpmn_full_documentation_styled.html“

with open(output_html, „w“, encoding=“utf-8″) as f:

f.write(„““

<html>

<head>

<style>

body { font-family: Arial, sans-serif; margin: 20px; background-color: #f9f9f9; }

h1 { color: #222222; }

h2 { color: #333333; margin-top: 40px; }

table { border-collapse: collapse; width: 100%; margin-bottom: 40px; background: #ffffff; }

th, td { border: 1px solid #cccccc; padding: 8px; text-align: left; }

th { background-color: #eeeeee; }

tr:nth-child(even) { background-color: #f2f2f2; }

</style>

</head>

<body>

<h1>BPMN Full Documentation</h1>

„““)

f.write(„<h2>Service Tasks in Subprocesses</h2>\n“)

f.write(df_subprocesses.to_html(index=False, escape=False))

f.write(„<h2>Exclusive Gateways</h2>\n“)

f.write(df_gateways.to_html(index=False, escape=False))

f.write(„<h2>Start Events</h2>\n“)

f.write(df_starts.to_html(index=False, escape=False))

f.write(„<h2>End Events</h2>\n“)

f.write(df_ends.to_html(index=False, escape=False))

f.write(„<h2>User Tasks</h2>\n“)

f.write(df_users.to_html(index=False, escape=False))

f.write(„<h2>Sequence Flows</h2>\n“)

f.write(df_flows.to_html(index=False, escape=False))

f.write(„</body></html>“)

print(f“HTML file successfully created: {output_html}“)