|
| 1 | + |
| 2 | +import argparse |
| 3 | +import pandas as pd |
| 4 | +import json |
| 5 | +import os |
| 6 | + |
| 7 | +def main(): |
| 8 | + |
| 9 | + # Causal pathways dictionary |
| 10 | + causal_pathways = { |
| 11 | + "Social better": { |
| 12 | + "@id": "https://repo.metadatacenter.org/template-instances/8302e75d-ab21-4613-b94a-89c0eaf54c39", |
| 13 | + "schema:name": "Social Better" |
| 14 | + }, |
| 15 | + "Social gain": { |
| 16 | + "@id": "https://repo.metadatacenter.org/template-instances/46d968d8-762f-4870-bb05-3321a9b69f14", |
| 17 | + "schema:name": "Social Gain" |
| 18 | + }, |
| 19 | + "Social worse": { |
| 20 | + "@id": "https://repo.metadatacenter.org/template-instances/f4006042-5750-4f6c-a039-fe0a88466464", |
| 21 | + "schema:name": "Social Worse" |
| 22 | + }, |
| 23 | + "Social loss": |
| 24 | + { |
| 25 | + "@id": "https://repo.metadatacenter.org/template-instances/88f49c3d-9f82-4134-9237-ac6eb0dec69f", |
| 26 | + "schema:name": "Social Loss" |
| 27 | + }, |
| 28 | + "Social approach": |
| 29 | + { |
| 30 | + "@id": "https://repo.metadatacenter.org/template-instances/0884af75-ed8c-4b61-aa15-67b41b93d834", |
| 31 | + "schema:name": "Social Approach" |
| 32 | + }, |
| 33 | + "Goal better": |
| 34 | + { |
| 35 | + "@id": "https://repo.metadatacenter.org/template-instances/deb62b84-bd40-4729-84ca-bf099cc2c07x4k", |
| 36 | + "schema:name": "Goal Better" |
| 37 | + }, |
| 38 | + "Goal gain": |
| 39 | + { |
| 40 | + "@id": "https://repo.metadatacenter.org/template-instances/deb62b84-bd40-4729-84ca-bf099cc2c07e", |
| 41 | + "schema:name": "Goal Gain" |
| 42 | + }, |
| 43 | + "Goal worse": |
| 44 | + { |
| 45 | + "@id": "http://example.com/goal_worse_temporary", |
| 46 | + "schema:name": "Goal Worse" |
| 47 | + }, |
| 48 | + "Goal loss": |
| 49 | + { |
| 50 | + "@id": "https://repo.metadatacenter.org/template-instances/d3159028-f58f-4bc2-bc44-8279263db82b", |
| 51 | + "schema:name": "Goal Loss" |
| 52 | + }, |
| 53 | + "Goal approach": |
| 54 | + { |
| 55 | + "@id": "https://repo.metadatacenter.org/template-instances/66c607b6-800b-4071-8a0a-627110f698e2", |
| 56 | + "schema:name": "Goal Approach" |
| 57 | + }, |
| 58 | + "Improving": { |
| 59 | + "@id": "https://repo.metadatacenter.org/template-instances/0b160448-c376-476d-b4a9-5e8a5496eaf0", |
| 60 | + "schema:name": "Improving" |
| 61 | + }, |
| 62 | + "Worsening": |
| 63 | + { |
| 64 | + "@id": "https://repo.metadatacenter.org/template-instances/f20f522c-3c0d-4eea-b459-95b8404051d5", |
| 65 | + "schema:name": "Worsening" |
| 66 | + } |
| 67 | + |
| 68 | + } |
| 69 | + parser = argparse.ArgumentParser(description="Knowledge Base Generator") |
| 70 | + parser.add_argument('--knowledge_base_path', default="./kb.xlsx", type=str, help='Path to the knowledge base Excel file') |
| 71 | + args = parser.parse_args() |
| 72 | + |
| 73 | + # Read the Excel file |
| 74 | + xlsx_path = args.knowledge_base_path |
| 75 | + try: |
| 76 | + xls = pd.ExcelFile(xlsx_path) |
| 77 | + except Exception as e: |
| 78 | + print(f"Error reading Excel file: {e}") |
| 79 | + return |
| 80 | + |
| 81 | + # Extract the required sheets |
| 82 | + try: |
| 83 | + performance_measures_df = pd.read_excel(xls, sheet_name="Performance Measures") |
| 84 | + except Exception as e: |
| 85 | + print(f"Error reading 'Performance Measures' sheet: {e}") |
| 86 | + performance_measures_df = None |
| 87 | + |
| 88 | + try: |
| 89 | + message_templates_df = pd.read_excel(xls, sheet_name="Message Templates") |
| 90 | + except Exception as e: |
| 91 | + print(f"Error reading 'Message Templates' sheet: {e}") |
| 92 | + message_templates_df = None |
| 93 | + |
| 94 | + |
| 95 | + |
| 96 | + # Generate measures.json from template and performance_measures_df |
| 97 | + if performance_measures_df is not None: |
| 98 | + template_path = os.path.join(os.path.dirname(__file__), 'templates/measures.json') |
| 99 | + with open(template_path, 'r') as f: |
| 100 | + measures_template = json.load(f) |
| 101 | + |
| 102 | + measures = [] |
| 103 | + for _, row in performance_measures_df.iterrows(): |
| 104 | + measure_id = str(row.get('Measure ID', '')).strip() |
| 105 | + measure_name = str(row.get('Measure Name', '')).strip() |
| 106 | + desired_direction = str(row.get('Desired Direction', '')).strip().lower() |
| 107 | + if not measure_id or not measure_name or not desired_direction: |
| 108 | + continue |
| 109 | + if desired_direction.startswith('increas'): |
| 110 | + direction_uri = "http://purl.obolibrary.org/obo/PSDO_0000039" |
| 111 | + elif desired_direction.startswith('decreas'): |
| 112 | + direction_uri = "http://purl.obolibrary.org/obo/PSDO_0000042" |
| 113 | + else: |
| 114 | + direction_uri = "" |
| 115 | + measure_obj = { |
| 116 | + "@id": f"_:{measure_id}", |
| 117 | + "@type": "http://purl.obolibrary.org/obo/PSDO_0000102", |
| 118 | + "dc:title": measure_name, |
| 119 | + "identifier": measure_id, |
| 120 | + "has_desired_direction": direction_uri |
| 121 | + } |
| 122 | + measures.append(measure_obj) |
| 123 | + |
| 124 | + measures_template["slowmo:IsAboutMeasure"] = measures |
| 125 | + |
| 126 | + kb_dir = os.path.dirname(os.path.abspath(xlsx_path)) |
| 127 | + output_path = os.path.join(kb_dir, "measures.json") |
| 128 | + with open(output_path, 'w') as f: |
| 129 | + json.dump(measures_template, f, indent=2) |
| 130 | + print(f"Generated measures.json with {len(measures)} measures at {output_path}") |
| 131 | + |
| 132 | + # Generate message.json files from Message Templates tab |
| 133 | + if message_templates_df is not None: |
| 134 | + import uuid |
| 135 | + from datetime import datetime |
| 136 | + msg_template_path = os.path.join(os.path.dirname(__file__), 'templates/message.json') |
| 137 | + with open(msg_template_path, 'r') as f: |
| 138 | + message_template = json.load(f) |
| 139 | + |
| 140 | + now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%S-08:00") |
| 141 | + kb_dir = os.path.dirname(os.path.abspath(xlsx_path)) |
| 142 | + |
| 143 | + for idx, row in message_templates_df.iterrows(): |
| 144 | + # Only process rows with is_about containing at least one object |
| 145 | + |
| 146 | + is_about_col = str(row.get('is_about', '')).strip() |
| 147 | + # Try to parse as a list of JSON objects, even if all in one string |
| 148 | + is_about_objs = [] |
| 149 | + if is_about_col: |
| 150 | + # Try to wrap in [] and parse as JSON array |
| 151 | + try: |
| 152 | + # Replace single quotes with double quotes for valid JSON |
| 153 | + is_about_json = '[' + is_about_col + ']' |
| 154 | + is_about_objs = json.loads(is_about_json.replace("'", '"')) |
| 155 | + except Exception: |
| 156 | + # Fallback: try to split by '},' and parse each object |
| 157 | + parts = is_about_col.split('},') |
| 158 | + for i, part in enumerate(parts): |
| 159 | + part = part.strip() |
| 160 | + if not part.endswith('}'): part += '}' |
| 161 | + try: |
| 162 | + is_about_objs.append(json.loads(part.replace("'", '"'))) |
| 163 | + except Exception: |
| 164 | + continue |
| 165 | + if not is_about_objs: |
| 166 | + continue |
| 167 | + |
| 168 | + direction = str(row.get('Desired Direction', '')).strip() |
| 169 | + if direction == "Increasing": |
| 170 | + is_about_objs.append({ |
| 171 | + "@id": "http://purl.obolibrary.org/obo/PSDO_0000039", |
| 172 | + "rdfs:label": "desired increase" |
| 173 | + }) |
| 174 | + elif direction == "Decreasing": |
| 175 | + is_about_objs.append({ |
| 176 | + "@id": "http://purl.obolibrary.org/obo/PSDO_0000042", |
| 177 | + "rdfs:label": "desired decrease" |
| 178 | + }) |
| 179 | + msg_obj = json.loads(json.dumps(message_template)) # deep copy |
| 180 | + msg_obj['is_about'] = is_about_objs |
| 181 | + |
| 182 | + # has_causal_pathway from Compatible Causal Pathway column using dictionary |
| 183 | + compatible_causal_pathway = str(row.get('Compatible Causal Pathway', '')).strip() |
| 184 | + if compatible_causal_pathway in causal_pathways: |
| 185 | + msg_obj['has_causal_pathway'] = causal_pathways[compatible_causal_pathway] |
| 186 | + else: |
| 187 | + msg_obj['has_causal_pathway'] = {"@id": "", "schema:name": compatible_causal_pathway} |
| 188 | + |
| 189 | + # Name and schema:name |
| 190 | + msg_name = str(row.get('Message Template', '')).strip() |
| 191 | + msg_obj['Name'] = {"@value": msg_name} |
| 192 | + msg_obj['schema:name'] = msg_name |
| 193 | + |
| 194 | + # Message text |
| 195 | + msg_text = str(row.get('Message Text', '')).strip() |
| 196 | + msg_obj['Message text'] = [{"@value": msg_text}] |
| 197 | + |
| 198 | + # Default display |
| 199 | + default_display = str(row.get('Compatible Visualizations', '')).strip() |
| 200 | + msg_obj['Default display'] = {"@value": default_display} |
| 201 | + |
| 202 | + # pav:createdOn and pav:lastUpdatedOn |
| 203 | + msg_obj['pav:createdOn'] = now_str |
| 204 | + msg_obj['pav:lastUpdatedOn'] = now_str |
| 205 | + |
| 206 | + # pav:createdBy, oslc:modifiedBy left blank |
| 207 | + |
| 208 | + # @id: random UUID |
| 209 | + msg_obj['@id'] = f"https://repo.metadatacenter.org/template-instances/{uuid.uuid4()}" |
| 210 | + |
| 211 | + # Short message ID: use index or leave as in template |
| 212 | + # Additional message text: leave as in template |
| 213 | + |
| 214 | + # Write to file |
| 215 | + out_filename = f"{msg_name}.json" |
| 216 | + out_path = os.path.join(kb_dir, out_filename) |
| 217 | + with open(out_path, 'w') as f: |
| 218 | + json.dump(msg_obj, f, indent=2) |
| 219 | + print(f"Generated {out_filename} at {out_path}") |
| 220 | + |
| 221 | +if __name__ == "__main__": |
| 222 | + main() |
0 commit comments