-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
10 lines (10 loc) · 2.93 KB
/
test.py
File metadata and controls
10 lines (10 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
[
{
"role": "system",
"content": "\nYou are an experienced data analyst that can annotate datasets. Your instructions are as follows:\ni) ALWAYS generate the name of the dataset and the dataset_description\nii) ALWAYS generate a field description.\niii.) ALWAYS generate a semantic_type (a single word) for each field given its values e.g. company, city, number, supplier, location, gender, longitude, latitude, url, ip address, zip code, email, etc\nYou must return an updated JSON dictionary without any preamble or explanation.\n",
},
{
"role": "assistant",
"content": "\n Annotate the dictionary below. Only return a JSON object.\n {'name': 'healthcare-dataset-stroke-data.csv', 'file_name': 'healthcare-dataset-stroke-data.csv', 'dataset_description': '', 'fields': [{'column': 'id', 'properties': {'dtype': 'number', 'std': 21078, 'min': 77, 'max': 72918, 'samples': [11960, 23094, 64931], 'num_unique_values': 4500, 'semantic_type': '', 'description': ''}}, {'column': 'gender', 'properties': {'dtype': 'category', 'samples': ['Female', 'Male', 'Other'], 'num_unique_values': 3, 'semantic_type': '', 'description': ''}}, {'column': 'age', 'properties': {'dtype': 'number', 'std': 22.561684025255936, 'min': 0.08, 'max': 82.0, 'samples': [47.0, 65.0, 82.0], 'num_unique_values': 104, 'semantic_type': '', 'description': ''}}, {'column': 'hypertension', 'properties': {'dtype': 'number', 'std': 0, 'min': 0, 'max': 1, 'samples': [1, 0], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'heart_disease', 'properties': {'dtype': 'number', 'std': 0, 'min': 0, 'max': 1, 'samples': [1, 0], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'ever_married', 'properties': {'dtype': 'category', 'samples': ['Yes', 'No'], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'work_type', 'properties': {'dtype': 'category', 'samples': ['Govt_job', 'Never_worked'], 'num_unique_values': 5, 'semantic_type': '', 'description': ''}}, {'column': 'Residence_type', 'properties': {'dtype': 'category', 'samples': ['Urban', 'Rural'], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}, {'column': 'avg_glucose_level', 'properties': {'dtype': 'number', 'std': 45.18319936102145, 'min': 55.12, 'max': 271.74, 'samples': [69.34, 61.98], 'num_unique_values': 3595, 'semantic_type': '', 'description': ''}}, {'column': 'bmi', 'properties': {'dtype': 'number', 'std': 7.942775558966113, 'min': 10.3, 'max': 97.6, 'samples': [15.5, 34.7], 'num_unique_values': 411, 'semantic_type': '', 'description': ''}}, {'column': 'smoking_status', 'properties': {'dtype': 'category', 'samples': ['smokes', 'Unknown'], 'num_unique_values': 4, 'semantic_type': '', 'description': ''}}, {'column': 'stroke', 'properties': {'dtype': 'number', 'std': 0, 'min': 0, 'max': 1, 'samples': [1, 0], 'num_unique_values': 2, 'semantic_type': '', 'description': ''}}]}\n ",
},
]