Skip to content

Commit ccbe9d9

Browse files
committed
Adding the script to prepare the measure and messages json files using the template excele file.
1 parent 52e1df2 commit ccbe9d9

3 files changed

Lines changed: 224 additions & 0 deletions

File tree

script/kb.xlsx

29.3 KB
Binary file not shown.

script/knowledge_base_generator.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
2+
import argparse
3+
import pandas as pd
4+
import json
5+
import os
6+
7+
def main():
8+
9+
# Causal pathways dictionary
10+
causal_pathways = {
11+
"Social better": {
12+
"@id": "https://repo.metadatacenter.org/template-instances/8302e75d-ab21-4613-b94a-89c0eaf54c39",
13+
"schema:name": "Social Better"
14+
},
15+
"Social gain": {
16+
"@id": "https://repo.metadatacenter.org/template-instances/46d968d8-762f-4870-bb05-3321a9b69f14",
17+
"schema:name": "Social Gain"
18+
},
19+
"Social worse": {
20+
"@id": "https://repo.metadatacenter.org/template-instances/f4006042-5750-4f6c-a039-fe0a88466464",
21+
"schema:name": "Social Worse"
22+
},
23+
"Social loss":
24+
{
25+
"@id": "https://repo.metadatacenter.org/template-instances/88f49c3d-9f82-4134-9237-ac6eb0dec69f",
26+
"schema:name": "Social Loss"
27+
},
28+
"Social approach":
29+
{
30+
"@id": "https://repo.metadatacenter.org/template-instances/0884af75-ed8c-4b61-aa15-67b41b93d834",
31+
"schema:name": "Social Approach"
32+
},
33+
"Goal better":
34+
{
35+
"@id": "https://repo.metadatacenter.org/template-instances/deb62b84-bd40-4729-84ca-bf099cc2c07x4k",
36+
"schema:name": "Goal Better"
37+
},
38+
"Goal gain":
39+
{
40+
"@id": "https://repo.metadatacenter.org/template-instances/deb62b84-bd40-4729-84ca-bf099cc2c07e",
41+
"schema:name": "Goal Gain"
42+
},
43+
"Goal worse":
44+
{
45+
"@id": "http://example.com/goal_worse_temporary",
46+
"schema:name": "Goal Worse"
47+
},
48+
"Goal loss":
49+
{
50+
"@id": "https://repo.metadatacenter.org/template-instances/d3159028-f58f-4bc2-bc44-8279263db82b",
51+
"schema:name": "Goal Loss"
52+
},
53+
"Goal approach":
54+
{
55+
"@id": "https://repo.metadatacenter.org/template-instances/66c607b6-800b-4071-8a0a-627110f698e2",
56+
"schema:name": "Goal Approach"
57+
},
58+
"Improving": {
59+
"@id": "https://repo.metadatacenter.org/template-instances/0b160448-c376-476d-b4a9-5e8a5496eaf0",
60+
"schema:name": "Improving"
61+
},
62+
"Worsening":
63+
{
64+
"@id": "https://repo.metadatacenter.org/template-instances/f20f522c-3c0d-4eea-b459-95b8404051d5",
65+
"schema:name": "Worsening"
66+
}
67+
68+
}
69+
parser = argparse.ArgumentParser(description="Knowledge Base Generator")
70+
parser.add_argument('--knowledge_base_path', default="./kb.xlsx", type=str, help='Path to the knowledge base Excel file')
71+
args = parser.parse_args()
72+
73+
# Read the Excel file
74+
xlsx_path = args.knowledge_base_path
75+
try:
76+
xls = pd.ExcelFile(xlsx_path)
77+
except Exception as e:
78+
print(f"Error reading Excel file: {e}")
79+
return
80+
81+
# Extract the required sheets
82+
try:
83+
performance_measures_df = pd.read_excel(xls, sheet_name="Performance Measures")
84+
except Exception as e:
85+
print(f"Error reading 'Performance Measures' sheet: {e}")
86+
performance_measures_df = None
87+
88+
try:
89+
message_templates_df = pd.read_excel(xls, sheet_name="Message Templates")
90+
except Exception as e:
91+
print(f"Error reading 'Message Templates' sheet: {e}")
92+
message_templates_df = None
93+
94+
95+
96+
# Generate measures.json from template and performance_measures_df
97+
if performance_measures_df is not None:
98+
template_path = os.path.join(os.path.dirname(__file__), 'templates/measures.json')
99+
with open(template_path, 'r') as f:
100+
measures_template = json.load(f)
101+
102+
measures = []
103+
for _, row in performance_measures_df.iterrows():
104+
measure_id = str(row.get('Measure ID', '')).strip()
105+
measure_name = str(row.get('Measure Name', '')).strip()
106+
desired_direction = str(row.get('Desired Direction', '')).strip().lower()
107+
if not measure_id or not measure_name or not desired_direction:
108+
continue
109+
if desired_direction.startswith('increas'):
110+
direction_uri = "http://purl.obolibrary.org/obo/PSDO_0000039"
111+
elif desired_direction.startswith('decreas'):
112+
direction_uri = "http://purl.obolibrary.org/obo/PSDO_0000042"
113+
else:
114+
direction_uri = ""
115+
measure_obj = {
116+
"@id": f"_:{measure_id}",
117+
"@type": "http://purl.obolibrary.org/obo/PSDO_0000102",
118+
"dc:title": measure_name,
119+
"identifier": measure_id,
120+
"has_desired_direction": direction_uri
121+
}
122+
measures.append(measure_obj)
123+
124+
measures_template["slowmo:IsAboutMeasure"] = measures
125+
126+
kb_dir = os.path.dirname(os.path.abspath(xlsx_path))
127+
output_path = os.path.join(kb_dir, "measures.json")
128+
with open(output_path, 'w') as f:
129+
json.dump(measures_template, f, indent=2)
130+
print(f"Generated measures.json with {len(measures)} measures at {output_path}")
131+
132+
# Generate message.json files from Message Templates tab
133+
if message_templates_df is not None:
134+
import uuid
135+
from datetime import datetime
136+
msg_template_path = os.path.join(os.path.dirname(__file__), 'templates/message.json')
137+
with open(msg_template_path, 'r') as f:
138+
message_template = json.load(f)
139+
140+
now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%S-08:00")
141+
kb_dir = os.path.dirname(os.path.abspath(xlsx_path))
142+
143+
for idx, row in message_templates_df.iterrows():
144+
# Only process rows with is_about containing at least one object
145+
146+
is_about_col = str(row.get('is_about', '')).strip()
147+
# Try to parse as a list of JSON objects, even if all in one string
148+
is_about_objs = []
149+
if is_about_col:
150+
# Try to wrap in [] and parse as JSON array
151+
try:
152+
# Replace single quotes with double quotes for valid JSON
153+
is_about_json = '[' + is_about_col + ']'
154+
is_about_objs = json.loads(is_about_json.replace("'", '"'))
155+
except Exception:
156+
# Fallback: try to split by '},' and parse each object
157+
parts = is_about_col.split('},')
158+
for i, part in enumerate(parts):
159+
part = part.strip()
160+
if not part.endswith('}'): part += '}'
161+
try:
162+
is_about_objs.append(json.loads(part.replace("'", '"')))
163+
except Exception:
164+
continue
165+
if not is_about_objs:
166+
continue
167+
168+
direction = str(row.get('Desired Direction', '')).strip()
169+
if direction == "Increasing":
170+
is_about_objs.append({
171+
"@id": "http://purl.obolibrary.org/obo/PSDO_0000039",
172+
"rdfs:label": "desired increase"
173+
})
174+
elif direction == "Decreasing":
175+
is_about_objs.append({
176+
"@id": "http://purl.obolibrary.org/obo/PSDO_0000042",
177+
"rdfs:label": "desired decrease"
178+
})
179+
msg_obj = json.loads(json.dumps(message_template)) # deep copy
180+
msg_obj['is_about'] = is_about_objs
181+
182+
# has_causal_pathway from Compatible Causal Pathway column using dictionary
183+
compatible_causal_pathway = str(row.get('Compatible Causal Pathway', '')).strip()
184+
if compatible_causal_pathway in causal_pathways:
185+
msg_obj['has_causal_pathway'] = causal_pathways[compatible_causal_pathway]
186+
else:
187+
msg_obj['has_causal_pathway'] = {"@id": "", "schema:name": compatible_causal_pathway}
188+
189+
# Name and schema:name
190+
msg_name = str(row.get('Message Template', '')).strip()
191+
msg_obj['Name'] = {"@value": msg_name}
192+
msg_obj['schema:name'] = msg_name
193+
194+
# Message text
195+
msg_text = str(row.get('Message Text', '')).strip()
196+
msg_obj['Message text'] = [{"@value": msg_text}]
197+
198+
# Default display
199+
default_display = str(row.get('Compatible Visualizations', '')).strip()
200+
msg_obj['Default display'] = {"@value": default_display}
201+
202+
# pav:createdOn and pav:lastUpdatedOn
203+
msg_obj['pav:createdOn'] = now_str
204+
msg_obj['pav:lastUpdatedOn'] = now_str
205+
206+
# pav:createdBy, oslc:modifiedBy left blank
207+
208+
# @id: random UUID
209+
msg_obj['@id'] = f"https://repo.metadatacenter.org/template-instances/{uuid.uuid4()}"
210+
211+
# Short message ID: use index or leave as in template
212+
# Additional message text: leave as in template
213+
214+
# Write to file
215+
out_filename = f"{msg_name}.json"
216+
out_path = os.path.join(kb_dir, out_filename)
217+
with open(out_path, 'w') as f:
218+
json.dump(msg_obj, f, indent=2)
219+
print(f"Generated {out_filename} at {out_path}")
220+
221+
if __name__ == "__main__":
222+
main()

script/readme.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
This script could be used to generate the messages and measure JSON files using data prepared in the kb.xlsx template file format.
2+

0 commit comments

Comments
 (0)