From b42ff50e49e1025b643e8d5e8e03399e0be90079 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Wed, 8 Apr 2026 07:00:03 +0100
Subject: [PATCH 01/12] add .idea to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 3bf8cf0..375503c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ test_results.py
 .vscode/settings.json
 temp_examples_output.txt
 json_block_*.json
+.idea/
\ No newline at end of file

From 2bb3fa1a3f68e790eb4e1250c40ff34d9ae31146 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 13:05:11 +0100
Subject: [PATCH 02/12] tests for connectivity queries - always expect data

---
 .../test_downstream_class_connectivity.py     | 120 +++++++++++
 src/test/test_neuron_neuron_connectivity.py   | 181 ++++++++++-------
 src/test/test_neuron_region_connectivity.py   | 192 ++++++++----------
 src/test/test_upstream_class_connectivity.py  | 120 +++++++++++
 4 files changed, 432 insertions(+), 181 deletions(-)
 create mode 100644 src/test/test_downstream_class_connectivity.py
 create mode 100644 src/test/test_upstream_class_connectivity.py

diff --git a/src/test/test_downstream_class_connectivity.py b/src/test/test_downstream_class_connectivity.py
new file mode 100644
index 0000000..2483046
--- /dev/null
+++ b/src/test/test_downstream_class_connectivity.py
@@ -0,0 +1,120 @@
+"""Tests for DownstreamClassConnectivity query.
+
+Tests the query that finds downstream partner neuron classes for a given
+neuron class, using the pre-indexed downstream_connectivity_query Solr field.
+"""
+
+import pytest
+import pandas as pd
+
+from vfbquery.vfb_queries import (
+    get_downstream_class_connectivity,
+    DownstreamClassConnectivity_to_schema,
+)
+
+# FBbt_00001482 = lineage NB3-2 primary interneuron — known to have
+# downstream_connectivity_query data in the vfb_json Solr core.
+TEST_CLASS = "FBbt_00001482"
+# A class that is unlikely to have downstream connectivity data.
+EMPTY_CLASS = "FBbt_00000001"
+
+
+class TestDownstreamClassConnectivityDict:
+    """Tests using return_dataframe=False (dict output)."""
+
+    @pytest.mark.integration
+    def test_returns_results(self):
+        result = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, force_refresh=True
+        )
+        assert isinstance(result, dict)
+        assert result["count"] > 0
+        assert len(result["rows"]) > 0
+
+    @pytest.mark.integration
+    def test_row_has_expected_keys(self):
+        result = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=1, force_refresh=True
+        )
+        assert result["rows"], "Expected at least one row"
+        row = result["rows"][0]
+        expected_keys = {
+            "id", "downstream_class", "total_n", "connected_n",
+            "percent_connected", "pairwise_connections", "total_weight", "avg_weight",
+        }
+        assert expected_keys.issubset(row.keys())
+
+    @pytest.mark.integration
+    def test_headers_present(self):
+        result = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=1, force_refresh=True
+        )
+        assert "headers" in result
+        assert "downstream_class" in result["headers"]
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        result = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=3, force_refresh=True
+        )
+        assert len(result["rows"]) <= 3
+        # count should reflect total, not the limited set
+        assert result["count"] >= len(result["rows"])
+
+    @pytest.mark.integration
+    def test_empty_class_returns_zero(self):
+        result = get_downstream_class_connectivity(
+            EMPTY_CLASS, return_dataframe=False, force_refresh=True
+        )
+        assert result["count"] == 0
+        assert result["rows"] == []
+
+
+class TestDownstreamClassConnectivityDataFrame:
+    """Tests using return_dataframe=True (DataFrame output)."""
+
+    @pytest.mark.integration
+    def test_returns_dataframe(self):
+        df = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, force_refresh=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert not df.empty
+
+    @pytest.mark.integration
+    def test_dataframe_has_expected_columns(self):
+        df = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, limit=1, force_refresh=True
+        )
+        expected_cols = {
+            "id", "downstream_class", "total_n", "connected_n",
+            "percent_connected", "pairwise_connections", "total_weight", "avg_weight",
+        }
+        assert expected_cols.issubset(set(df.columns))
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        df = get_downstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, limit=5, force_refresh=True
+        )
+        assert len(df) <= 5
+
+    @pytest.mark.integration
+    def test_empty_class_returns_empty_dataframe(self):
+        df = get_downstream_class_connectivity(
+            EMPTY_CLASS, return_dataframe=True, force_refresh=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert df.empty
+
+
+class TestDownstreamClassConnectivitySchema:
+    def test_schema_generation(self):
+        schema = DownstreamClassConnectivity_to_schema(
+            "test neuron class", {"short_form": TEST_CLASS}
+        )
+        assert schema.query == "DownstreamClassConnectivity"
+        assert schema.function == "get_downstream_class_connectivity"
+        assert schema.preview == 5
+        assert "downstream_class" in schema.preview_columns
+        assert "percent_connected" in schema.preview_columns
diff --git a/src/test/test_neuron_neuron_connectivity.py b/src/test/test_neuron_neuron_connectivity.py
index aa6ff6e..b833b93 100644
--- a/src/test/test_neuron_neuron_connectivity.py
+++ b/src/test/test_neuron_neuron_connectivity.py
@@ -1,89 +1,118 @@
-#!/usr/bin/env python3
-"""
-Test suite for NeuronNeuronConnectivityQuery.
+"""Tests for NeuronNeuronConnectivityQuery.
 
 Tests the query that finds neurons connected to a given neuron.
 This implements the neuron_neuron_connectivity_query from the VFB XMI specification.
-
-Test cases:
-1. Query execution with known neuron
-2. Schema generation and validation
-3. Term info integration (if applicable)
-4. Preview results validation
 """
 
-import unittest
-import sys
-import os
-
-# Add the src directory to the path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+import pytest
+import pandas as pd
 
 from vfbquery.vfb_queries import (
     get_neuron_neuron_connectivity,
     NeuronNeuronConnectivityQuery_to_schema,
-    get_term_info
 )
 
-class NeuronNeuronConnectivityTest(unittest.TestCase):
-    """Test suite for neuron_neuron_connectivity_query"""
-
-    def setUp(self):
-        """Set up test fixtures"""
-        # Test neuron: LPC1 (FlyEM-HB:1775513344) [VFB_jrchk00s]
-        self.test_neuron = "VFB_jrchk00s"
-
-    def test_query_execution(self):
-        """Test that the query executes successfully"""
-        print(f"\n=== Testing neuron_neuron_connectivity_query execution ===")
-        result = get_neuron_neuron_connectivity(self.test_neuron, return_dataframe=False, limit=5)
-        self.assertIsNotNone(result, "Query should return a result")
-        self.assertIsInstance(result, dict, "Result should be a dictionary")
-        print(f"Query returned {result.get('count', 0)} results")
-        if 'data' in result and len(result['data']) > 0:
-            first_result = result['data'][0]
-            self.assertIn('id', first_result, "Result should contain 'id' field")
-            self.assertIn('label', first_result, "Result should contain 'label' field")
-            print(f"First result: {first_result.get('label', 'N/A')} ({first_result.get('id', 'N/A')})")
-        else:
-            print("No connected neurons found (this is OK if none exist)")
+# VFB_jrchk00s = LPC1 (FlyEM-HB:1775513344) — known to have connectivity data.
+TEST_NEURON = "VFB_jrchk00s"
+
+
+class TestNeuronNeuronConnectivityDict:
+    """Tests using return_dataframe=False (dict output)."""
+
+    @pytest.mark.integration
+    def test_returns_results(self):
+        result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False
+        )
+        assert isinstance(result, dict)
+        assert result["count"] > 0
+        assert len(result["rows"]) > 0
+
+    @pytest.mark.integration
+    def test_row_has_expected_keys(self):
+        result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=1
+        )
+        assert result["rows"], "Expected at least one row"
+        row = result["rows"][0]
+        expected_keys = {"id", "label", "outputs", "inputs", "tags"}
+        assert expected_keys.issubset(row.keys())
+
+    @pytest.mark.integration
+    def test_headers_present(self):
+        result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=1
+        )
+        assert "headers" in result
+        assert "label" in result["headers"]
+        assert "outputs" in result["headers"]
+        assert "inputs" in result["headers"]
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=3
+        )
+        assert len(result["rows"]) <= 3
+        assert result["count"] >= len(result["rows"])
+
+    @pytest.mark.integration
+    def test_direction_upstream(self):
+        all_result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False
+        )
+        up_result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False, direction='upstream'
+        )
+        assert up_result["count"] > 0
+        assert up_result["count"] <= all_result["count"]
+
+    @pytest.mark.integration
+    def test_direction_downstream(self):
+        all_result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False
+        )
+        down_result = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=False, direction='downstream'
+        )
+        assert down_result["count"] > 0
+        assert down_result["count"] <= all_result["count"]
+
+
+class TestNeuronNeuronConnectivityDataFrame:
+    """Tests using return_dataframe=True (DataFrame output)."""
+
+    @pytest.mark.integration
+    def test_returns_dataframe(self):
+        df = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert not df.empty
+
+    @pytest.mark.integration
+    def test_dataframe_has_expected_columns(self):
+        df = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=True, limit=1
+        )
+        expected_cols = {"id", "label", "outputs", "inputs", "tags"}
+        assert expected_cols.issubset(set(df.columns))
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        df = get_neuron_neuron_connectivity(
+            TEST_NEURON, return_dataframe=True, limit=5
+        )
+        assert len(df) <= 5
+
 
+class TestNeuronNeuronConnectivitySchema:
     def test_schema_generation(self):
-        """Test schema function generates correct structure"""
-        print(f"\n=== Testing neuron_neuron_connectivity_query schema generation ===")
-        test_name = "LPC1"
-        test_takes = {"short_form": self.test_neuron}
-        schema = NeuronNeuronConnectivityQuery_to_schema(test_name, test_takes)
-        self.assertIsNotNone(schema, "Schema should not be None")
-        self.assertEqual(schema.query, "NeuronNeuronConnectivityQuery", "Query name should match")
-        self.assertEqual(schema.label, f"Neurons connected to {test_name}", "Label should be formatted correctly")
-        self.assertEqual(schema.function, "get_neuron_neuron_connectivity", "Function name should match")
-        self.assertEqual(schema.preview, 5, "Preview should be 5")
-        expected_columns = ["id", "label", "outputs", "inputs", "tags"]
-        self.assertEqual(schema.preview_columns, expected_columns, f"Preview columns should be {expected_columns}")
-        print(f"Schema generated successfully: {schema.label}")
-
-    def test_preview_results(self):
-        """Test that preview results are properly formatted"""
-        print(f"\n=== Testing preview results ===")
-        result = get_neuron_neuron_connectivity(self.test_neuron, return_dataframe=False, limit=3)
-        self.assertIsNotNone(result, "Query should return a result")
-        if 'data' in result and len(result['data']) > 0:
-            first_result = result['data'][0]
-            self.assertIn('id', first_result, "Preview result should have 'id'")
-            self.assertIn('label', first_result, "Preview result should have 'label'")
-            print(f"First preview result: {first_result.get('label', 'N/A')}")
-        else:
-            print("No preview results available (this is OK if no connected neurons exist)")
-
-
-def run_tests():
-    """Run the test suite"""
-    suite = unittest.TestLoader().loadTestsFromTestCase(NeuronNeuronConnectivityTest)
-    runner = unittest.TextTestRunner(verbosity=2)
-    result = runner.run(suite)
-    return result.wasSuccessful()
-
-if __name__ == '__main__':
-    success = run_tests()
-    sys.exit(0 if success else 1)
+        schema = NeuronNeuronConnectivityQuery_to_schema(
+            "LPC1", {"short_form": TEST_NEURON}
+        )
+        assert schema.query == "NeuronNeuronConnectivityQuery"
+        assert schema.function == "get_neuron_neuron_connectivity"
+        assert schema.label == "Neurons connected to LPC1"
+        assert schema.preview == 5
+        assert schema.preview_columns == ["id", "label", "outputs", "inputs", "tags"]
diff --git a/src/test/test_neuron_region_connectivity.py b/src/test/test_neuron_region_connectivity.py
index 72f0efe..88c8b75 100644
--- a/src/test/test_neuron_region_connectivity.py
+++ b/src/test/test_neuron_region_connectivity.py
@@ -1,117 +1,99 @@
-#!/usr/bin/env python3
-"""
-Test suite for NeuronRegionConnectivityQuery.
+"""Tests for NeuronRegionConnectivityQuery.
 
-Tests the query that shows connectivity to regions from a given neuron.
+Tests the query that finds brain regions where a given neuron has synaptic terminals.
 This implements the neuron_region_connectivity_query from the VFB XMI specification.
-
-Test cases:
-1. Query execution with known neuron
-2. Schema generation and validation
-3. Term info integration (if applicable)
-4. Preview results validation
 """
 
-import unittest
-import sys
-import os
-
-# Add the src directory to the path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+import pytest
+import pandas as pd
 
 from vfbquery.vfb_queries import (
     get_neuron_region_connectivity,
     NeuronRegionConnectivityQuery_to_schema,
-    get_term_info
+    get_term_info,
 )
 
-class NeuronRegionConnectivityTest(unittest.TestCase):
-    """Test suite for neuron_region_connectivity_query"""
-
-    def setUp(self):
-        """Set up test fixtures"""
-        # Test neuron: LPC1 (FlyEM-HB:1775513344) [VFB_jrchk00s]
-        self.test_neuron = "VFB_jrchk00s"
-
-    def test_query_execution(self):
-        """Test that the query executes successfully"""
-        print(f"\n=== Testing neuron_region_connectivity_query execution ===")
-        result = get_neuron_region_connectivity(self.test_neuron, return_dataframe=False, limit=5)
-        self.assertIsNotNone(result, "Query should return a result")
-        self.assertIsInstance(result, dict, "Result should be a dictionary")
-        print(f"Query returned {result.get('count', 0)} results")
-        if 'data' in result and len(result['data']) > 0:
-            first_result = result['data'][0]
-            self.assertIn('id', first_result, "Result should contain 'id' field")
-            self.assertIn('region', first_result, "Result should contain 'region' field")
-            self.assertIn('presynaptic_terminals', first_result, "Result should contain 'presynaptic_terminals' field")
-            self.assertIn('postsynaptic_terminals', first_result, "Result should contain 'postsynaptic_terminals' field")
-            print(f"First result: {first_result.get('region', 'N/A')} ({first_result.get('id', 'N/A')})")
-            print(f"  Pre: {first_result.get('presynaptic_terminals', 0)}, Post: {first_result.get('postsynaptic_terminals', 0)}")
-        else:
-            print("No regions with connectivity found (this is OK if none exist)")
+# VFB_jrchk00s = LPC1 (FlyEM-HB:1775513344) — known to have region connectivity data.
+TEST_NEURON = "VFB_jrchk00s"
+
+
+class TestNeuronRegionConnectivityDict:
+    """Tests using return_dataframe=False (dict output)."""
+
+    @pytest.mark.integration
+    def test_returns_results(self):
+        result = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=False
+        )
+        assert isinstance(result, dict)
+        assert result["count"] > 0
+        assert len(result["rows"]) > 0
+
+    @pytest.mark.integration
+    def test_row_has_expected_keys(self):
+        result = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=1
+        )
+        assert result["rows"], "Expected at least one row"
+        row = result["rows"][0]
+        expected_keys = {"id", "region", "presynaptic_terminals", "postsynaptic_terminals", "tags"}
+        assert expected_keys.issubset(row.keys())
+
+    @pytest.mark.integration
+    def test_headers_present(self):
+        result = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=1
+        )
+        assert "headers" in result
+        assert "region" in result["headers"]
+        assert "presynaptic_terminals" in result["headers"]
+        assert "postsynaptic_terminals" in result["headers"]
 
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        result = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=False, limit=3
+        )
+        assert len(result["rows"]) <= 3
+        assert result["count"] >= len(result["rows"])
+
+
+class TestNeuronRegionConnectivityDataFrame:
+    """Tests using return_dataframe=True (DataFrame output)."""
+
+    @pytest.mark.integration
+    def test_returns_dataframe(self):
+        df = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert not df.empty
+
+    @pytest.mark.integration
+    def test_dataframe_has_expected_columns(self):
+        df = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=True, limit=1
+        )
+        expected_cols = {"id", "region", "presynaptic_terminals", "postsynaptic_terminals", "tags"}
+        assert expected_cols.issubset(set(df.columns))
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        df = get_neuron_region_connectivity(
+            TEST_NEURON, return_dataframe=True, limit=3
+        )
+        assert len(df) <= 3
+
+
+class TestNeuronRegionConnectivitySchema:
     def test_schema_generation(self):
-        """Test that the schema function works correctly"""
-        print(f"\n=== Testing NeuronRegionConnectivityQuery schema generation ===")
-        
-        # Get term info for the test neuron
-        term_info = get_term_info(self.test_neuron)
-        if term_info:
-            neuron_name = term_info.get('Name', self.test_neuron)
-        else:
-            neuron_name = self.test_neuron
-        
-        # Generate schema
-        schema = NeuronRegionConnectivityQuery_to_schema(neuron_name, self.test_neuron)
-        
-        # Validate schema structure
-        self.assertIsNotNone(schema, "Schema should not be None")
-        self.assertEqual(schema.query, "NeuronRegionConnectivityQuery", "Query name should match")
-        self.assertEqual(schema.function, "get_neuron_region_connectivity", "Function name should match")
-        self.assertEqual(schema.preview, 5, "Preview should show 5 results")
-        self.assertIn("region", schema.preview_columns, "Preview should include 'region' column")
-        self.assertIn("presynaptic_terminals", schema.preview_columns, "Preview should include 'presynaptic_terminals' column")
-        self.assertIn("postsynaptic_terminals", schema.preview_columns, "Preview should include 'postsynaptic_terminals' column")
-        
-        print(f"Schema label: {schema.label}")
-        print(f"Preview columns: {schema.preview_columns}")
-
-    def test_term_info_integration(self):
-        """Test that term info lookup works for the test neuron"""
-        print(f"\n=== Testing term_info integration ===")
-        term_info = get_term_info(self.test_neuron)
-        
-        self.assertIsNotNone(term_info, "Term info should not be None")
-        if term_info:
-            # get_term_info returns a dict with 'Name', 'Id', 'Tags', etc.
-            self.assertIn('Name', term_info, "Term info should contain 'Name'")
-            self.assertIn('Id', term_info, "Term info should contain 'Id'")
-            print(f"Neuron name: {term_info.get('Name', 'N/A')}")
-            print(f"Neuron tags: {term_info.get('Tags', [])}")
-        else:
-            print(f"Note: Term info not found for {self.test_neuron} (may not be in SOLR)")
-
-    def test_preview_validation(self):
-        """Test that preview results are properly formatted"""
-        print(f"\n=== Testing preview results ===")
-        result = get_neuron_region_connectivity(self.test_neuron, return_dataframe=False, limit=5)
-        
-        if 'data' in result and len(result['data']) > 0:
-            # Check that all preview columns exist in the results
-            expected_columns = ['id', 'region', 'presynaptic_terminals', 'postsynaptic_terminals', 'tags']
-            for item in result['data']:
-                for col in expected_columns:
-                    self.assertIn(col, item, f"Result should contain '{col}' field")
-            
-            print(f"✓ All {len(result['data'])} results have required preview columns")
-            
-            # Print sample results
-            for i, item in enumerate(result['data'][:3], 1):
-                print(f"{i}. {item.get('region', 'N/A')} - Pre:{item.get('presynaptic_terminals', 0)}, Post:{item.get('postsynaptic_terminals', 0)}")
-        else:
-            print("No preview data available (query returned no results)")
-
-
-if __name__ == '__main__':
-    unittest.main(verbosity=2)
+        term_info = get_term_info(TEST_NEURON)
+        neuron_name = term_info.get('Name', TEST_NEURON) if term_info else TEST_NEURON
+
+        schema = NeuronRegionConnectivityQuery_to_schema(neuron_name, TEST_NEURON)
+        assert schema.query == "NeuronRegionConnectivityQuery"
+        assert schema.function == "get_neuron_region_connectivity"
+        assert schema.preview == 5
+        assert "region" in schema.preview_columns
+        assert "presynaptic_terminals" in schema.preview_columns
+        assert "postsynaptic_terminals" in schema.preview_columns
diff --git a/src/test/test_upstream_class_connectivity.py b/src/test/test_upstream_class_connectivity.py
new file mode 100644
index 0000000..ae59e9f
--- /dev/null
+++ b/src/test/test_upstream_class_connectivity.py
@@ -0,0 +1,120 @@
+"""Tests for UpstreamClassConnectivity query.
+
+Tests the query that finds upstream partner neuron classes for a given
+neuron class, using the pre-indexed upstream_connectivity_query Solr field.
+"""
+
+import pytest
+import pandas as pd
+
+from vfbquery.vfb_queries import (
+    get_upstream_class_connectivity,
+    UpstreamClassConnectivity_to_schema,
+)
+
+# FBbt_00001482 = lineage NB3-2 primary interneuron — known to have
+# upstream_connectivity_query data in the vfb_json Solr core.
+TEST_CLASS = "FBbt_00001482"
+# A class that is unlikely to have upstream connectivity data.
+EMPTY_CLASS = "FBbt_00000001"
+
+
+class TestUpstreamClassConnectivityDict:
+    """Tests using return_dataframe=False (dict output)."""
+
+    @pytest.mark.integration
+    def test_returns_results(self):
+        result = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, force_refresh=True
+        )
+        assert isinstance(result, dict)
+        assert result["count"] > 0
+        assert len(result["rows"]) > 0
+
+    @pytest.mark.integration
+    def test_row_has_expected_keys(self):
+        result = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=1, force_refresh=True
+        )
+        assert result["rows"], "Expected at least one row"
+        row = result["rows"][0]
+        expected_keys = {
+            "id", "upstream_class", "total_n", "connected_n",
+            "percent_connected", "pairwise_connections", "total_weight", "avg_weight",
+        }
+        assert expected_keys.issubset(row.keys())
+
+    @pytest.mark.integration
+    def test_headers_present(self):
+        result = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=1, force_refresh=True
+        )
+        assert "headers" in result
+        assert "upstream_class" in result["headers"]
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        result = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=False, limit=3, force_refresh=True
+        )
+        assert len(result["rows"]) <= 3
+        # count should reflect total, not the limited set
+        assert result["count"] >= len(result["rows"])
+
+    @pytest.mark.integration
+    def test_empty_class_returns_zero(self):
+        result = get_upstream_class_connectivity(
+            EMPTY_CLASS, return_dataframe=False, force_refresh=True
+        )
+        assert result["count"] == 0
+        assert result["rows"] == []
+
+
+class TestUpstreamClassConnectivityDataFrame:
+    """Tests using return_dataframe=True (DataFrame output)."""
+
+    @pytest.mark.integration
+    def test_returns_dataframe(self):
+        df = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, force_refresh=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert not df.empty
+
+    @pytest.mark.integration
+    def test_dataframe_has_expected_columns(self):
+        df = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, limit=1, force_refresh=True
+        )
+        expected_cols = {
+            "id", "upstream_class", "total_n", "connected_n",
+            "percent_connected", "pairwise_connections", "total_weight", "avg_weight",
+        }
+        assert expected_cols.issubset(set(df.columns))
+
+    @pytest.mark.integration
+    def test_limit_respected(self):
+        df = get_upstream_class_connectivity(
+            TEST_CLASS, return_dataframe=True, limit=5, force_refresh=True
+        )
+        assert len(df) <= 5
+
+    @pytest.mark.integration
+    def test_empty_class_returns_empty_dataframe(self):
+        df = get_upstream_class_connectivity(
+            EMPTY_CLASS, return_dataframe=True, force_refresh=True
+        )
+        assert isinstance(df, pd.DataFrame)
+        assert df.empty
+
+
+class TestUpstreamClassConnectivitySchema:
+    def test_schema_generation(self):
+        schema = UpstreamClassConnectivity_to_schema(
+            "test neuron class", {"short_form": TEST_CLASS}
+        )
+        assert schema.query == "UpstreamClassConnectivity"
+        assert schema.function == "get_upstream_class_connectivity"
+        assert schema.preview == 5
+        assert "upstream_class" in schema.preview_columns
+        assert "percent_connected" in schema.preview_columns

From 008685b44e16e9afee094e070b5d5341255bdd06 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 17:05:53 +0100
Subject: [PATCH 03/12] code for building cell type and region hierarchies

---
 src/test/test_hierarchy.py    | 133 ++++++++++++
 src/vfbquery/ha_api.py        |  34 +++
 src/vfbquery/owlery_client.py |   2 +-
 src/vfbquery/vfb_queries.py   | 397 ++++++++++++++++++++++++++++++++++
 4 files changed, 565 insertions(+), 1 deletion(-)
 create mode 100644 src/test/test_hierarchy.py

diff --git a/src/test/test_hierarchy.py b/src/test/test_hierarchy.py
new file mode 100644
index 0000000..585dfc9
--- /dev/null
+++ b/src/test/test_hierarchy.py
@@ -0,0 +1,133 @@
+"""Tests for get_hierarchy function.
+
+Tests the hierarchy tree builder for both part_of (brain region structure)
+and subclass_of (cell type hierarchies), in both ancestor and descendant
+directions.
+"""
+
+import pytest
+
+from vfbquery.vfb_queries import get_hierarchy
+
+
+# Known test terms
+MUSHROOM_BODY = "FBbt_00005801"
+KENYON_CELL = "FBbt_00003686"
+
+
+class TestHierarchyValidation:
+    def test_invalid_relationship_raises(self):
+        with pytest.raises(ValueError, match="relationship"):
+            get_hierarchy(KENYON_CELL, relationship="invalid")
+
+    def test_invalid_direction_raises(self):
+        with pytest.raises(ValueError, match="direction"):
+            get_hierarchy(KENYON_CELL, direction="invalid")
+
+
+class TestSubclassOfDescendants:
+    @pytest.mark.integration
+    def test_returns_descendants(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        assert result['id'] == KENYON_CELL
+        assert result['label'] == 'Kenyon cell'
+        assert result['relationship'] == 'subclass_of'
+        assert 'descendants' in result
+        assert len(result['descendants']) > 0
+
+    @pytest.mark.integration
+    def test_descendants_have_id_and_label(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        for child in result['descendants']:
+            assert 'id' in child
+            assert 'label' in child
+            assert child['id'].startswith('FBbt_')
+            assert child['label'] != child['id']  # label should be resolved
+
+    @pytest.mark.integration
+    def test_depth_1_has_no_grandchildren(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        for child in result['descendants']:
+            assert 'descendants' not in child
+
+    @pytest.mark.integration
+    def test_depth_2_has_nested_children(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=2)
+        has_grandchildren = any('descendants' in child for child in result['descendants'])
+        assert has_grandchildren, "At least one direct subclass should have its own subclasses"
+
+
+class TestSubclassOfAncestors:
+    @pytest.mark.integration
+    def test_returns_ancestors(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'ancestors', max_depth=1)
+        assert 'ancestors' in result
+        assert len(result['ancestors']) > 0
+
+    @pytest.mark.integration
+    def test_ancestors_have_id_and_label(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'ancestors', max_depth=1)
+        for anc in result['ancestors']:
+            assert 'id' in anc
+            assert 'label' in anc
+
+    @pytest.mark.integration
+    def test_kenyon_cell_ancestor_is_mb_intrinsic_neuron(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'ancestors', max_depth=1)
+        ancestor_ids = [a['id'] for a in result['ancestors']]
+        assert 'FBbt_00007484' in ancestor_ids  # mushroom body intrinsic neuron
+
+    @pytest.mark.integration
+    def test_depth_2_has_nested_ancestors(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'ancestors', max_depth=2)
+        has_grandparent = any('ancestors' in anc for anc in result['ancestors'])
+        assert has_grandparent
+
+
+class TestPartOfDescendants:
+    @pytest.mark.integration
+    def test_returns_parts(self):
+        result = get_hierarchy(MUSHROOM_BODY, 'part_of', 'descendants', max_depth=1)
+        assert result['id'] == MUSHROOM_BODY
+        assert result['label'] == 'mushroom body'
+        assert 'descendants' in result
+        assert len(result['descendants']) > 0
+
+    @pytest.mark.integration
+    def test_parts_have_id_and_label(self):
+        result = get_hierarchy(MUSHROOM_BODY, 'part_of', 'descendants', max_depth=1)
+        for part in result['descendants']:
+            assert 'id' in part
+            assert 'label' in part
+            assert part['id'].startswith('FBbt_')
+
+
+class TestPartOfAncestors:
+    @pytest.mark.integration
+    def test_mushroom_body_part_of_protocerebrum(self):
+        result = get_hierarchy(MUSHROOM_BODY, 'part_of', 'ancestors', max_depth=1)
+        assert 'ancestors' in result
+        ancestor_ids = [a['id'] for a in result['ancestors']]
+        assert 'FBbt_00003627' in ancestor_ids  # protocerebrum
+
+
+class TestBothDirections:
+    @pytest.mark.integration
+    def test_both_returns_ancestors_and_descendants(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'both', max_depth=1)
+        assert 'ancestors' in result
+        assert 'descendants' in result
+        assert len(result['ancestors']) > 0
+        assert len(result['descendants']) > 0
+
+    @pytest.mark.integration
+    def test_descendants_only_has_no_ancestors(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        assert 'descendants' in result
+        assert 'ancestors' not in result
+
+    @pytest.mark.integration
+    def test_ancestors_only_has_no_descendants(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'ancestors', max_depth=1)
+        assert 'ancestors' in result
+        assert 'descendants' not in result
diff --git a/src/vfbquery/ha_api.py b/src/vfbquery/ha_api.py
index 061ecdd..914c717 100644
--- a/src/vfbquery/ha_api.py
+++ b/src/vfbquery/ha_api.py
@@ -897,6 +897,39 @@ def post_fn(result):
     )
 
 
+def _run_get_hierarchy(short_form, relationship, direction, max_depth):
+    """Worker: run get_hierarchy in a subprocess."""
+    from . import vfb_queries as _vfb
+    return _convert_numpy_types(
+        _vfb.get_hierarchy(short_form, relationship=relationship,
+                           direction=direction, max_depth=max_depth)
+    )
+
+
+async def handle_get_hierarchy(request):
+    """GET /get_hierarchy?id=FBbt_00005801&relationship=part_of&direction=both&max_depth=1"""
+    short_form = request.query.get("id")
+    if not short_form:
+        return web.json_response({"error": "id parameter is required"}, status=400)
+    relationship = request.query.get("relationship", "part_of")
+    if relationship not in ("part_of", "subclass_of"):
+        return web.json_response(
+            {"error": "relationship must be 'part_of' or 'subclass_of'"}, status=400
+        )
+    direction = request.query.get("direction", "both")
+    if direction not in ("descendants", "ancestors", "both"):
+        return web.json_response(
+            {"error": "direction must be 'descendants', 'ancestors', or 'both'"}, status=400
+        )
+    max_depth = int(request.query.get("max_depth", "1"))
+
+    key = f"get_hierarchy:{short_form}:{relationship}:{direction}:{max_depth}"
+    return await _dispatch_to_pool(
+        request, key, _run_get_hierarchy,
+        short_form, relationship, direction, max_depth,
+    )
+
+
 # ---------------------------------------------------------------------------
 # Application factory
 # ---------------------------------------------------------------------------
@@ -937,6 +970,7 @@ def create_app(max_workers=None, max_concurrent=None, max_queue_depth=None,
     app.router.add_get("/find_combo_publications", handle_find_combo_publications)
     app.router.add_get("/list_connectome_datasets", handle_list_connectome_datasets)
     app.router.add_get("/query_connectivity", handle_query_connectivity)
+    app.router.add_get("/get_hierarchy", handle_get_hierarchy)
 
     # Store config for /status and handlers
     app["max_workers"] = max_workers
diff --git a/src/vfbquery/owlery_client.py b/src/vfbquery/owlery_client.py
index 895cd28..af12656 100644
--- a/src/vfbquery/owlery_client.py
+++ b/src/vfbquery/owlery_client.py
@@ -105,7 +105,7 @@ def convert_short_form_to_iri(match):
             # Based on VFBConnect's query() method
             params = {
                 'object': iri_query,
-                'direct': 'false',  # Always use indirect (transitive) queries
+                'direct': 'true' if direct else 'false',
                 'includeDeprecated': 'false',  # Exclude deprecated terms
                 'includeEquivalent': 'true'  # Include equivalent classes
             }
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index 36b07ed..564c534 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -4538,3 +4538,400 @@ def process_query(query):
         process_query(query)
 
     return term_info
+
+
+def get_hierarchy(short_form, relationship='part_of', direction='both', max_depth=1):
+    """Build a hierarchy tree showing ancestors and/or descendants of a term.
+
+    For ``subclass_of`` descendants, all descendants are fetched in one Owlery
+    call (fast, cached) and the tree is reconstructed by looking up each term's
+    parents in SOLR.  For ``part_of`` descendants, direct children are fetched
+    per level via Owlery ``direct=True`` (slower on first call, but results are
+    cached by the Owlery server).
+
+    :param short_form: Root term ID (e.g. 'FBbt_00005801')
+    :param relationship: 'part_of' for brain region structure, 'subclass_of' for cell type hierarchies
+    :param direction: 'descendants', 'ancestors', or 'both'
+    :param max_depth: Levels to expand (default 1 = direct only; -1 = unlimited)
+    :return: Nested dict with id, label, ancestors, descendants
+    """
+    if relationship not in ('part_of', 'subclass_of'):
+        raise ValueError("relationship must be 'part_of' or 'subclass_of'")
+    if direction not in ('descendants', 'ancestors', 'both'):
+        raise ValueError("direction must be 'descendants', 'ancestors', or 'both'")
+
+    label_cache = {}
+    _ont_solr = pysolr.Solr('https://solr.virtualflybrain.org/solr/ontology/', always_commit=False, timeout=30)
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _batch_lookup_labels(ids):
+        """Fetch labels for a list of IDs from the ontology SOLR core."""
+        missing = [i for i in ids if i not in label_cache]
+        if not missing:
+            return
+        try:
+            id_list = ','.join(missing)
+            results = _ont_solr.search(
+                q='*:*',
+                fq=f'{{!terms f=short_form}}{id_list}',
+                fl='short_form,label',
+                rows=len(missing)
+            )
+            for doc in results.docs:
+                label_cache[doc.get('short_form', '')] = doc.get('label', doc.get('short_form', ''))
+        except Exception:
+            pass
+        for i in missing:
+            label_cache.setdefault(i, i)
+
+    def _get_all_children(term_id):
+        """Get all descendants (transitive) using the existing cached functions."""
+        if relationship == 'part_of':
+            result = get_parts_of(term_id, return_dataframe=False)
+        else:
+            result = get_subclasses_of(term_id, return_dataframe=False)
+        if not result or not result.get('rows'):
+            return []
+        return [row['id'] for row in result['rows'] if row.get('id') and row['id'] != term_id]
+
+    def _term_info_parents(term_id):
+        """Return [(parent_sf, parent_label), ...] from SOLR term_info."""
+        try:
+            results = vfb_solr.search(f'id:{term_id}', fl='term_info', rows=1)
+            if not results.docs or 'term_info' not in results.docs[0]:
+                return []
+            raw = results.docs[0]['term_info']
+            ti = json.loads(raw[0] if isinstance(raw, list) else raw)
+            if relationship == 'subclass_of':
+                return [(p['short_form'], p.get('label', p['short_form'])) for p in ti.get('parents', [])]
+            else:
+                # part_of: BFO_0000050 in relationships
+                out = []
+                for r in ti.get('relationships', []):
+                    if 'BFO_0000050' in r.get('relation', {}).get('iri', ''):
+                        obj = r['object']
+                        out.append((obj['short_form'], obj.get('label', obj['short_form'])))
+                # Fallback to Neo4j edge
+                if not out:
+                    try:
+                        cypher = (
+                            f"MATCH (c:Class {{short_form: '{term_id}'}})"
+                            f"-[:part_of]->(p:Class) "
+                            f"RETURN p.short_form AS sf, p.label AS label"
+                        )
+                        for row in get_dict_cursor()(vc.nc.commit_list([cypher])):
+                            out.append((row['sf'], row.get('label', row['sf'])))
+                    except Exception:
+                        pass
+                return out
+        except Exception:
+            return []
+
+    # ------------------------------------------------------------------
+    # Descendants
+    # ------------------------------------------------------------------
+
+    def _build_descendants_subclass(root_id):
+        """Build subclass tree: one cached Owlery call + batch SOLR parent lookup."""
+        all_desc = _get_all_children(root_id)
+        if not all_desc:
+            return []
+
+        tree_ids = set(all_desc) | {root_id}
+        _batch_lookup_labels(list(tree_ids))
+
+        # Batch-fetch parents from vfb_json SOLR
+        children_of = {tid: [] for tid in tree_ids}
+        id_list = ','.join(all_desc)
+        try:
+            results = vfb_solr.search(
+                q='id:*', fq=f'{{!terms f=id}}{id_list}', fl='id,term_info', rows=len(all_desc)
+            )
+            for doc in results.docs:
+                child_id = doc.get('id', '')
+                if 'term_info' not in doc:
+                    continue
+                raw = doc['term_info']
+                ti = json.loads(raw[0] if isinstance(raw, list) else raw)
+                parents_in_tree = [p['short_form'] for p in ti.get('parents', []) if p['short_form'] in tree_ids]
+                if parents_in_tree:
+                    for pid in parents_in_tree:
+                        children_of[pid].append(child_id)
+                else:
+                    children_of[root_id].append(child_id)
+        except Exception:
+            children_of[root_id] = all_desc
+
+        def build(node_id, depth):
+            node = {'id': node_id, 'label': label_cache.get(node_id, node_id)}
+            if max_depth == -1 or depth < max_depth:
+                kids = children_of.get(node_id, [])
+                if kids:
+                    node['descendants'] = [
+                        build(k, depth + 1)
+                        for k in sorted(kids, key=lambda x: label_cache.get(x, x))
+                    ]
+            return node
+
+        top = children_of.get(root_id, [])
+        return [build(k, 1) for k in sorted(top, key=lambda x: label_cache.get(x, x))]
+
+    def _build_descendants_part_of(root_id):
+        """Build part_of descendant tree via Ubergraph SPARQL.
+
+        Queries the Ubergraph redundant graph for all transitive part_of
+        edges within the subtree, then reconstructs the nesting by finding
+        each child's most specific parent.
+        """
+        import requests as _req
+        from collections import defaultdict
+
+        root_iri = _short_form_to_iri(root_id)
+        sparql = f'''
+PREFIX BFO: <http://purl.obolibrary.org/obo/BFO_>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+SELECT DISTINCT ?child ?childLabel ?parent ?parentLabel WHERE {{
+  GRAPH <http://reasoner.renci.org/redundant> {{
+    ?child BFO:0000050 <{root_iri}> .
+    ?child BFO:0000050 ?parent .
+  }}
+  FILTER(?parent != ?child)
+  FILTER(
+    ?parent = <{root_iri}> ||
+    EXISTS {{
+      GRAPH <http://reasoner.renci.org/redundant> {{
+        ?parent BFO:0000050 <{root_iri}> .
+      }}
+    }}
+  )
+  ?child rdfs:label ?childLabel .
+  ?parent rdfs:label ?parentLabel .
+  FILTER(STRSTARTS(STR(?child), "http://purl.obolibrary.org/obo/FBbt_"))
+}}
+'''
+        try:
+            resp = _req.get(
+                'https://ubergraph.apps.renci.org/sparql',
+                params={'query': sparql},
+                headers={'Accept': 'application/json'},
+                timeout=30,
+            )
+            resp.raise_for_status()
+            bindings = resp.json().get('results', {}).get('bindings', [])
+        except Exception:
+            # Fallback to flat list via Owlery
+            all_parts = _get_all_children(root_id)
+            if not all_parts:
+                return []
+            _batch_lookup_labels(all_parts)
+            return [
+                {'id': pid, 'label': label_cache.get(pid, pid)}
+                for pid in sorted(all_parts, key=lambda x: label_cache.get(x, x))
+            ]
+
+        if not bindings:
+            return []
+
+        # Parse SPARQL results into parent map
+        parents_of = defaultdict(set)
+        all_parts = set()
+        for b in bindings:
+            csf = b['child']['value'].rsplit('/', 1)[-1]
+            psf = b['parent']['value'].rsplit('/', 1)[-1]
+            parents_of[csf].add(psf)
+            label_cache[csf] = b['childLabel']['value']
+            label_cache[psf] = b['parentLabel']['value']
+            all_parts.add(csf)
+
+        # Find most specific parent for each child
+        # (no other parent of this child is itself a descendant of this parent)
+        children_of = defaultdict(list)
+        for child in all_parts:
+            best = []
+            for p in parents_of[child]:
+                if not any(p in parents_of.get(q, set()) for q in parents_of[child] if q != p):
+                    best.append(p)
+            for bp in best:
+                children_of[bp].append(child)
+
+        def build(node_id, depth):
+            node = {'id': node_id, 'label': label_cache.get(node_id, node_id)}
+            if max_depth == -1 or depth < max_depth:
+                kids = children_of.get(node_id, [])
+                if kids:
+                    node['descendants'] = [
+                        build(k, depth + 1)
+                        for k in sorted(kids, key=lambda x: label_cache.get(x, x))
+                    ]
+            return node
+
+        top = children_of.get(root_id, [])
+        return [build(k, 1) for k in sorted(top, key=lambda x: label_cache.get(x, x))]
+
+    # ------------------------------------------------------------------
+    # Ancestors
+    # ------------------------------------------------------------------
+
+    def _build_ancestors_subclass(term_id, depth, visited):
+        """Build is-a ancestor chain from SOLR term_info parents."""
+        if term_id in visited or (max_depth != -1 and depth >= max_depth):
+            return []
+        visited.add(term_id)
+        parent_tuples = _term_info_parents(term_id)
+        if not parent_tuples:
+            return []
+        ancestors = []
+        for psf, plabel in parent_tuples:
+            label_cache[psf] = plabel
+            node = {'id': psf, 'label': plabel}
+            further = _build_ancestors_subclass(psf, depth + 1, visited)
+            if further:
+                node['ancestors'] = further
+            ancestors.append(node)
+        return ancestors
+
+    def _build_ancestors_part_of(term_id):
+        """Build part_of ancestor chain via Ubergraph SPARQL.
+
+        Filters ancestors to terms that are part of the nervous system
+        (or the nervous system itself) to exclude developmental lineage
+        terms and generic structural classes that leak in via is-a
+        propagation in the Ubergraph redundant graph.
+        """
+        import requests as _req
+        from collections import defaultdict
+
+        term_iri = _short_form_to_iri(term_id)
+        sparql = f'''
+PREFIX BFO: <http://purl.obolibrary.org/obo/BFO_>
+PREFIX FBbt: <http://purl.obolibrary.org/obo/FBbt_>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+SELECT DISTINCT ?ancestor ?ancestorLabel ?parent ?parentLabel WHERE {{
+  GRAPH <http://reasoner.renci.org/redundant> {{
+    <{term_iri}> BFO:0000050 ?ancestor .
+  }}
+  FILTER(?ancestor != <{term_iri}>)
+  FILTER(STRSTARTS(STR(?ancestor), "http://purl.obolibrary.org/obo/FBbt_"))
+  FILTER(
+    ?ancestor = FBbt:00005093 ||
+    EXISTS {{
+      GRAPH <http://reasoner.renci.org/redundant> {{
+        ?ancestor BFO:0000050 FBbt:00005093 .
+      }}
+    }}
+  )
+  ?ancestor rdfs:label ?ancestorLabel .
+  OPTIONAL {{
+    GRAPH <http://reasoner.renci.org/redundant> {{
+      ?ancestor BFO:0000050 ?parent .
+    }}
+    FILTER(
+      ?parent = FBbt:00005093 ||
+      EXISTS {{
+        GRAPH <http://reasoner.renci.org/redundant> {{
+          ?parent BFO:0000050 FBbt:00005093 .
+        }}
+      }}
+    )
+    FILTER(?parent != ?ancestor)
+    FILTER(STRSTARTS(STR(?parent), "http://purl.obolibrary.org/obo/FBbt_"))
+    FILTER(
+      EXISTS {{
+        GRAPH <http://reasoner.renci.org/redundant> {{
+          <{term_iri}> BFO:0000050 ?parent .
+        }}
+      }}
+    )
+    ?parent rdfs:label ?parentLabel .
+  }}
+}}
+'''
+        try:
+            resp = _req.get(
+                'https://ubergraph.apps.renci.org/sparql',
+                params={'query': sparql},
+                headers={'Accept': 'application/json'},
+                timeout=30,
+            )
+            resp.raise_for_status()
+            bindings = resp.json().get('results', {}).get('bindings', [])
+        except Exception:
+            # Fallback to term_info approach
+            return _build_ancestors_subclass(term_id, 0, set())
+
+        if not bindings:
+            return []
+
+        # Build parent map among ancestors
+        parents_of = defaultdict(set)
+        all_ancestors = set()
+        for b in bindings:
+            asf = b['ancestor']['value'].rsplit('/', 1)[-1]
+            label_cache[asf] = b['ancestorLabel']['value']
+            all_ancestors.add(asf)
+            if 'parent' in b:
+                psf = b['parent']['value'].rsplit('/', 1)[-1]
+                parents_of[asf].add(psf)
+                label_cache[psf] = b['parentLabel']['value']
+
+        # Find most specific ancestors (direct parents of the query term)
+        # = ancestors that aren't themselves ancestors of another ancestor
+        children_of = defaultdict(list)
+        for anc in all_ancestors:
+            best = []
+            for p in parents_of.get(anc, set()):
+                if p in all_ancestors:
+                    if not any(p in parents_of.get(q, set()) for q in parents_of.get(anc, set()) if q != p and q in all_ancestors):
+                        best.append(p)
+            for bp in best:
+                children_of[bp].append(anc)
+
+        # Direct parents of query term = ancestors with no child that is also an ancestor
+        direct_parents = [a for a in all_ancestors if not any(a in parents_of.get(other, set()) for other in all_ancestors if other != a)]
+
+        def build(node_id, depth):
+            node = {'id': node_id, 'label': label_cache.get(node_id, node_id)}
+            if max_depth == -1 or depth < max_depth:
+                # Find this node's parents among the ancestors
+                node_parents = [p for p in parents_of.get(node_id, set()) if p in all_ancestors]
+                # Most specific parents
+                best = []
+                for p in node_parents:
+                    if not any(p in parents_of.get(q, set()) for q in node_parents if q != p):
+                        best.append(p)
+                if best:
+                    node['ancestors'] = [
+                        build(p, depth + 1)
+                        for p in sorted(best, key=lambda x: label_cache.get(x, x))
+                    ]
+            return node
+
+        return [build(dp, 1) for dp in sorted(direct_parents, key=lambda x: label_cache.get(x, x))]
+
+    # ------------------------------------------------------------------
+    # Assemble result
+    # ------------------------------------------------------------------
+
+    _batch_lookup_labels([short_form])
+    root = {
+        'id': short_form,
+        'label': label_cache.get(short_form, short_form),
+        'relationship': relationship,
+    }
+
+    if direction in ('descendants', 'both'):
+        if relationship == 'subclass_of':
+            root['descendants'] = _build_descendants_subclass(short_form)
+        else:
+            root['descendants'] = _build_descendants_part_of(short_form)
+
+    if direction in ('ancestors', 'both'):
+        if relationship == 'subclass_of':
+            root['ancestors'] = _build_ancestors_subclass(short_form, 0, set())
+        else:
+            root['ancestors'] = _build_ancestors_part_of(short_form)
+
+    return root

From 747412f1539e1a1e82c538d150b016be7ec0f1a9 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 17:18:30 +0100
Subject: [PATCH 04/12] limit cell type ancestors

---
 src/vfbquery/vfb_queries.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index 564c534..942ce24 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -4776,15 +4776,38 @@ def build(node_id, depth):
     # ------------------------------------------------------------------
 
     def _build_ancestors_subclass(term_id, depth, visited):
-        """Build is-a ancestor chain from SOLR term_info parents."""
+        """Build is-a ancestor chain from SOLR term_info parents.
+
+        Filters to FBbt cell terms only (types includes 'Cell') to
+        exclude cross-ontology parents (CL, UBERON, BFO, etc.) and
+        non-cell ancestors (developmental lineage, anatomical structure).
+        Stops at 'cell' (FBbt_00007002).
+        """
         if term_id in visited or (max_depth != -1 and depth >= max_depth):
             return []
+        if term_id == 'FBbt_00007002':  # cell — top of useful hierarchy
+            return []
         visited.add(term_id)
-        parent_tuples = _term_info_parents(term_id)
-        if not parent_tuples:
+
+        try:
+            results = vfb_solr.search(f'id:{term_id}', fl='term_info', rows=1)
+            if not results.docs or 'term_info' not in results.docs[0]:
+                return []
+            raw = results.docs[0]['term_info']
+            ti = json.loads(raw[0] if isinstance(raw, list) else raw)
+            parents = ti.get('parents', [])
+        except Exception:
             return []
+
         ancestors = []
-        for psf, plabel in parent_tuples:
+        for p in parents:
+            psf = p['short_form']
+            # Filter: must be FBbt and must be a cell type
+            if not psf.startswith('FBbt_'):
+                continue
+            if 'Cell' not in p.get('types', []):
+                continue
+            plabel = p.get('label', psf)
             label_cache[psf] = plabel
             node = {'id': psf, 'label': plabel}
             further = _build_ancestors_subclass(psf, depth + 1, visited)

From 148ee9457c28320815b456a8d9e798034f4f678b Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:15:14 +0100
Subject: [PATCH 05/12] display of hierarchies

---
 src/test/test_hierarchy.py  |  33 +++++++
 src/vfbquery/ha_api.py      |  32 ++++++
 src/vfbquery/vfb_queries.py | 189 ++++++++++++++++++++++++++++++++++++
 3 files changed, 254 insertions(+)

diff --git a/src/test/test_hierarchy.py b/src/test/test_hierarchy.py
index 585dfc9..da66d25 100644
--- a/src/test/test_hierarchy.py
+++ b/src/test/test_hierarchy.py
@@ -111,6 +111,39 @@ def test_mushroom_body_part_of_protocerebrum(self):
         assert 'FBbt_00003627' in ancestor_ids  # protocerebrum
 
 
+class TestDisplayOutput:
+    @pytest.mark.integration
+    def test_display_field_present(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'both', max_depth=1)
+        assert 'display' in result
+        assert isinstance(result['display'], str)
+        assert 'Kenyon cell' in result['display']
+
+    @pytest.mark.integration
+    def test_display_shows_ancestors(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'both', max_depth=1)
+        assert 'ancestors' in result['display'].lower()
+        assert 'mushroom body intrinsic neuron' in result['display']
+
+    @pytest.mark.integration
+    def test_display_shows_tree_connectors(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        assert '├──' in result['display'] or '└──' in result['display']
+
+    @pytest.mark.integration
+    def test_html_field_present(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'both', max_depth=1)
+        assert 'html' in result
+        assert '<!DOCTYPE html>' in result['html']
+        assert 'Kenyon cell' in result['html']
+
+    @pytest.mark.integration
+    def test_html_contains_vfb_links(self):
+        result = get_hierarchy(KENYON_CELL, 'subclass_of', 'descendants', max_depth=1)
+        assert 'virtualflybrain.org' in result['html']
+        assert KENYON_CELL in result['html']
+
+
 class TestBothDirections:
     @pytest.mark.integration
     def test_both_returns_ancestors_and_descendants(self):
diff --git a/src/vfbquery/ha_api.py b/src/vfbquery/ha_api.py
index 914c717..a091d6b 100644
--- a/src/vfbquery/ha_api.py
+++ b/src/vfbquery/ha_api.py
@@ -930,6 +930,37 @@ async def handle_get_hierarchy(request):
     )
 
 
+async def handle_get_hierarchy_html(request):
+    """GET /get_hierarchy_html?id=FBbt_00005801&relationship=part_of&direction=both&max_depth=1
+
+    Serves the hierarchy as a self-contained HTML page (Content-Type: text/html).
+    """
+    short_form = request.query.get("id")
+    if not short_form:
+        return web.Response(text="Error: id parameter is required", status=400)
+    relationship = request.query.get("relationship", "part_of")
+    if relationship not in ("part_of", "subclass_of"):
+        return web.Response(text="Error: relationship must be 'part_of' or 'subclass_of'", status=400)
+    direction = request.query.get("direction", "both")
+    if direction not in ("descendants", "ancestors", "both"):
+        return web.Response(text="Error: direction must be 'descendants', 'ancestors', or 'both'", status=400)
+    max_depth = int(request.query.get("max_depth", "1"))
+
+    key = f"get_hierarchy:{short_form}:{relationship}:{direction}:{max_depth}"
+    json_response = await _dispatch_to_pool(
+        request, key, _run_get_hierarchy,
+        short_form, relationship, direction, max_depth,
+    )
+
+    # Extract HTML from the JSON result
+    import json as _json
+    result = _json.loads(json_response.body)
+    html = result.get("html", "")
+    if not html:
+        return web.Response(text="No hierarchy data found", status=404)
+    return web.Response(text=html, content_type="text/html")
+
+
 # ---------------------------------------------------------------------------
 # Application factory
 # ---------------------------------------------------------------------------
@@ -971,6 +1002,7 @@ def create_app(max_workers=None, max_concurrent=None, max_queue_depth=None,
     app.router.add_get("/list_connectome_datasets", handle_list_connectome_datasets)
     app.router.add_get("/query_connectivity", handle_query_connectivity)
     app.router.add_get("/get_hierarchy", handle_get_hierarchy)
+    app.router.add_get("/get_hierarchy_html", handle_get_hierarchy_html)
 
     # Store config for /status and handlers
     app["max_workers"] = max_workers
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index 942ce24..9449492 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -4957,4 +4957,193 @@ def build(node_id, depth):
         else:
             root['ancestors'] = _build_ancestors_part_of(short_form)
 
+    # ------------------------------------------------------------------
+    # Render display text and HTML
+    # ------------------------------------------------------------------
+
+    VFB_BASE = 'https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id='
+    DEFAULT_MAX_SIBLINGS = 10  # truncate large sibling groups in text display
+
+    def _text_tree(node, prefix='', is_last=True, is_root=True, max_siblings=DEFAULT_MAX_SIBLINGS):
+        """Render a node and its descendants as a text tree."""
+        lines = []
+        label = f'{node["label"]} ({node["id"]})'
+        if is_root:
+            lines.append(label)
+        else:
+            lines.append(prefix + ('└── ' if is_last else '├── ') + label)
+        child_prefix = prefix + ('    ' if is_last else '│   ')
+        children = node.get('descendants', [])
+        for i, child in enumerate(children):
+            if max_siblings is not None and len(children) > max_siblings and i == max_siblings - 2:
+                lines.append(child_prefix + f'├── ... ({len(children) - max_siblings + 1} more)')
+                lines.extend(_text_tree(children[-1], child_prefix, True, False, max_siblings))
+                break
+            lines.extend(_text_tree(child, child_prefix, i == len(children) - 1, False, max_siblings))
+        return lines
+
+    def _invert_ancestor_tree(ancestors, leaf_node):
+        """Invert ancestor tree so highest-level terms are roots and the query term is a leaf.
+
+        Returns a list of top-level nodes, each with 'descendants' pointing downward
+        toward the query term.
+        """
+        def _collect_roots(ancestors):
+            """Find the top-level ancestors (those with no further ancestors)."""
+            roots = []
+            for a in ancestors:
+                if 'ancestors' in a and a['ancestors']:
+                    roots.extend(_collect_roots(a['ancestors']))
+                else:
+                    roots.append(a)
+            return roots
+
+        def _build_inverted(node, ancestors, target_leaf):
+            """Build downward tree from an ancestor node toward the target leaf."""
+            # Find which of the ancestors list directly to this node
+            children_toward_leaf = []
+            for a in ancestors:
+                if 'ancestors' in a and a['ancestors']:
+                    for grandparent in a['ancestors']:
+                        if grandparent['id'] == node['id']:
+                            children_toward_leaf.append(a)
+                elif a['id'] == node['id']:
+                    # This ancestor IS the current node — leaf's direct parent
+                    pass
+
+            result = {'id': node['id'], 'label': node['label']}
+            if children_toward_leaf:
+                result['descendants'] = [
+                    _build_inverted(c, ancestors, target_leaf)
+                    for c in sorted(children_toward_leaf, key=lambda x: x.get('label', ''))
+                ]
+            else:
+                # This node's child is the query term itself
+                result['descendants'] = [leaf_node]
+            return result
+
+        # Collect all ancestor nodes into a flat list with their parent links
+        all_nodes = {}  # id -> node
+        parent_map = {}  # child_id -> set of parent_ids
+
+        def _walk(ancestors, child_id=None):
+            for a in ancestors:
+                all_nodes[a['id']] = {'id': a['id'], 'label': a['label']}
+                if child_id:
+                    parent_map.setdefault(child_id, set()).add(a['id'])
+                if 'ancestors' in a and a['ancestors']:
+                    _walk(a['ancestors'], a['id'])
+
+        _walk(ancestors, leaf_node['id'])
+
+        # Roots are nodes that aren't children of anything
+        all_children = set()
+        for children in parent_map.values():
+            all_children.update(children)
+        all_parents = set(parent_map.keys())
+        root_ids = all_children - all_parents
+
+        if not root_ids:
+            # Fallback: all direct ancestors are roots
+            root_ids = {a['id'] for a in ancestors}
+
+        # Add leaf node to all_nodes so its label is available
+        all_nodes[leaf_node['id']] = leaf_node
+
+        # Build downward trees from each root
+        def _build_down(node_id):
+            node = {'id': node_id, 'label': all_nodes.get(node_id, {}).get('label', node_id)}
+            children_ids = [cid for cid, pids in parent_map.items() if node_id in pids]
+            if children_ids:
+                node['descendants'] = [
+                    _build_down(cid)
+                    for cid in sorted(children_ids, key=lambda x: all_nodes.get(x, {}).get('label', x))
+                ]
+            return node
+
+        return [_build_down(rid) for rid in sorted(root_ids, key=lambda x: all_nodes.get(x, {}).get('label', x))]
+
+    display_lines = []
+    if 'ancestors' in root and root['ancestors']:
+        rel_label = 'Part of' if relationship == 'part_of' else 'Is a'
+        display_lines.append(f'{rel_label} (ancestors):')
+        inverted = _invert_ancestor_tree(root['ancestors'], {'id': root['id'], 'label': root['label']})
+        for node in inverted:
+            display_lines.extend(_text_tree(node))
+        display_lines.append('')
+
+    if 'descendants' in root:
+        rel_label = 'Has parts' if relationship == 'part_of' else 'Subtypes'
+        display_lines.append(f'{rel_label} (descendants):')
+        display_lines.extend(_text_tree(root))
+
+    root['display'] = '\n'.join(display_lines)
+
+    # Full display (no sibling truncation)
+    full_lines = []
+    if 'ancestors' in root and root['ancestors']:
+        rel_label = 'Part of' if relationship == 'part_of' else 'Is a'
+        full_lines.append(f'{rel_label} (ancestors):')
+        inverted_full = _invert_ancestor_tree(root['ancestors'], {'id': root['id'], 'label': root['label']})
+        for node in inverted_full:
+            full_lines.extend(_text_tree(node, max_siblings=None))
+        full_lines.append('')
+
+    if 'descendants' in root:
+        rel_label = 'Has parts' if relationship == 'part_of' else 'Subtypes'
+        full_lines.append(f'{rel_label} (descendants):')
+        full_lines.extend(_text_tree(root, max_siblings=None))
+
+    root['display_full'] = '\n'.join(full_lines)
+
+    # HTML rendering
+    def _html_tree_nodes(node, depth=0, key='descendants'):
+        """Render a node as nested HTML list items."""
+        sid = node['id']
+        label = node['label']
+        link = f'<a href="{VFB_BASE}{sid}" target="_blank">{label}</a> <span class="id">({sid})</span>'
+        children = node.get(key, [])
+        if not children:
+            return f'<li><details class="leaf"><summary>{link}</summary></details></li>'
+        items = ''.join(_html_tree_nodes(c, depth + 1, key) for c in children)
+        return f'<li><details{"" if depth > 1 else " open"}><summary>{link}</summary><ul>{items}</ul></details></li>'
+
+    html_parts = [
+        '<!DOCTYPE html><html><head><meta charset="utf-8">',
+        f'<title>Hierarchy: {root["label"]}</title>',
+        '<style>',
+        'body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif; margin: 2em; max-width: 900px; line-height: 1.5; color: #24292e; }',
+        'h1 { font-size: 1.4em; border-bottom: 1px solid #e1e4e8; padding-bottom: .3em; }',
+        'h2 { font-size: 1.1em; margin-top: 1.5em; color: #586069; }',
+        'ul { list-style: none; padding-left: 1.5em; }',
+        'li { margin: .2em 0; }',
+        'details > summary { cursor: pointer; }',
+        'details > summary:hover { color: #0366d6; }',
+        'details.leaf > summary { list-style-type: "·  "; cursor: default; }',
+        'details.leaf > summary::-webkit-details-marker { display: none; }',
+        'a { color: #0366d6; text-decoration: none; }',
+        'a:hover { text-decoration: underline; }',
+        '.id { color: #6a737d; font-size: .85em; }',
+        '.path { background: #f6f8fa; padding: .8em 1em; border-radius: 6px; margin: 1em 0; font-size: .95em; }',
+        '.path a { font-weight: 500; }',
+        '</style></head><body>',
+        f'<h1>{root["label"]} <span class="id">({root["id"]})</span></h1>',
+    ]
+
+    if 'ancestors' in root and root['ancestors']:
+        rel_label = 'Part of' if relationship == 'part_of' else 'Is a'
+        html_parts.append(f'<h2>{rel_label} (ancestors)</h2>')
+        inverted_html = _invert_ancestor_tree(root['ancestors'], {'id': root['id'], 'label': root['label']})
+        items = ''.join(_html_tree_nodes(n) for n in inverted_html)
+        html_parts.append(f'<ul>{items}</ul>')
+
+    if 'descendants' in root and root['descendants']:
+        rel_label = 'Has parts' if relationship == 'part_of' else 'Subtypes'
+        html_parts.append(f'<h2>{rel_label} (descendants)</h2>')
+        root_node_html = _html_tree_nodes({'id': root['id'], 'label': root['label'], 'descendants': root['descendants']})
+        html_parts.append(f'<ul>{root_node_html}</ul>')
+
+    html_parts.append('</body></html>')
+    root['html'] = '\n'.join(html_parts)
+
     return root

From 780f0fdedc06a67ed6936d9796aff9c88588e6e4 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:28:12 +0100
Subject: [PATCH 06/12] update count due to 2 genuine new refs

---
 src/test/term_info_queries_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/term_info_queries_test.py b/src/test/term_info_queries_test.py
index b3e7849..f8a8313 100644
--- a/src/test/term_info_queries_test.py
+++ b/src/test/term_info_queries_test.py
@@ -299,7 +299,7 @@ def test_term_info_serialization_neuron_class2(self):
         self.assertFalse("thumbnail" in serialized)
 
         self.assertTrue("references" in serialized)
-        self.assertEqual(7, len(serialized["references"]))
+        self.assertEqual(9, len(serialized["references"]))
 
         self.assertTrue("targetingSplits" in serialized)
         self.assertEqual(6, len(serialized["targetingSplits"]))

From b2cd47773964429520a767b73959d1a35330b7f6 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:35:55 +0100
Subject: [PATCH 07/12] fix workflow

---
 .github/workflows/performance-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
index fe48e0a..913b241 100644
--- a/.github/workflows/performance-test.yml
+++ b/.github/workflows/performance-test.yml
@@ -238,4 +238,4 @@ jobs:
         uses: ad-m/github-push-action@master
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          branch: ${{ github.ref }}
+          branch: ${{ github.head_ref || github.ref_name }}

From 89c79c69fda643fabfe9ce9c40755bc9807c394b Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:41:10 +0100
Subject: [PATCH 08/12] fix workflow

---
 .github/workflows/performance-test.yml | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
index 913b241..719eb05 100644
--- a/.github/workflows/performance-test.yml
+++ b/.github/workflows/performance-test.yml
@@ -225,17 +225,13 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
           cat performance.md >> $GITHUB_STEP_SUMMARY
           
-      - name: Commit Performance Report
+      - name: Commit and Push Performance Report
         if: always()
         run: |
           git config --local user.email "action@github.com"
           git config --local user.name "GitHub Action"
           git add performance.md
           git diff --staged --quiet || git commit -m "Update performance test results [skip ci]"
-          
-      - name: Push Performance Report
-        if: always()
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          branch: ${{ github.head_ref || github.ref_name }}
+          BRANCH="${{ github.head_ref || github.ref_name }}"
+          git pull --rebase origin "$BRANCH" || true
+          git push origin HEAD:"$BRANCH" || echo "Push failed — performance report not updated"

From 8baf56645aa4589581c99560ab92c3f787119abb Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:48:13 +0100
Subject: [PATCH 09/12] improve committing of performance.md

---
 .github/workflows/performance-test.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
index 719eb05..cf1ee70 100644
--- a/.github/workflows/performance-test.yml
+++ b/.github/workflows/performance-test.yml
@@ -230,8 +230,12 @@ jobs:
         run: |
           git config --local user.email "action@github.com"
           git config --local user.name "GitHub Action"
+          BRANCH="${{ github.head_ref || github.ref_name }}"
+          # Fetch latest branch, reset to it, then apply only performance.md
+          cp performance.md /tmp/performance.md
+          git fetch origin "$BRANCH"
+          git reset --hard "origin/$BRANCH"
+          cp /tmp/performance.md performance.md
           git add performance.md
           git diff --staged --quiet || git commit -m "Update performance test results [skip ci]"
-          BRANCH="${{ github.head_ref || github.ref_name }}"
-          git pull --rebase origin "$BRANCH" || true
           git push origin HEAD:"$BRANCH" || echo "Push failed — performance report not updated"

From b67e047705f0e7099e45c773a16c33722fe443a6 Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Mon, 13 Apr 2026 18:51:14 +0000
Subject: [PATCH 10/12] Update performance test results [skip ci]

---
 performance.md | 97 +++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/performance.md b/performance.md
index caa7c8c..e1dd987 100644
--- a/performance.md
+++ b/performance.md
@@ -1,9 +1,9 @@
 # VFBquery Performance Test Results
 
-**Test Date:** 2026-03-30 10:31:19 UTC
-**Git Commit:** 01b7f4af4c1f78b683a728124e9892e85732161e
-**Branch:** main
-**Workflow Run:** [23740149842](https://github.com/VirtualFlyBrain/VFBquery/actions/runs/23740149842)
+**Test Date:** 2026-04-13 18:51:13 UTC
+**Git Commit:** e6305c538e164f2573c4c204fd3457acbe4675d5
+**Branch:** 38/merge
+**Workflow Run:** [24360853454](https://github.com/VirtualFlyBrain/VFBquery/actions/runs/24360853454)
 
 ## Test Overview
 
@@ -122,10 +122,10 @@ Test dataset and template queries
 Traceback (most recent call last):
   File "/home/runner/work/VFBquery/VFBquery/src/test/test_query_performance.py", line 660, in test_13_dataset_template_queries
     self.assertLess(duration, self.THRESHOLD_MEDIUM, "AllAlignedImages exceeded threshold")
-AssertionError: 7.013141632080078 not less than 3.0 : AllAlignedImages exceeded threshold
+AssertionError: 4.026433706283569 not less than 3.0 : AllAlignedImages exceeded threshold
 
 ----------------------------------------------------------------------
-Ran 15 tests in 110.035s
+Ran 15 tests in 125.594s
 
 FAILED (failures=1)
 VFBquery functions patched with caching support
@@ -137,113 +137,114 @@ VFBquery: SOLR caching enabled by default (3-month TTL)
 ================================================================================
 TERM INFO QUERIES
 ================================================================================
-get_term_info (mushroom body): 2.6564s ✅
-get_term_info (individual): 2.6602s ✅
+get_term_info (mushroom body): 1.7197s ✅
+get_term_info (individual): 1.3654s ✅
 
 ================================================================================
 NEURON PART OVERLAP QUERIES
 ================================================================================
-NeuronsPartHere: 3.1075s ✅
+NeuronsPartHere: 1.6850s ✅
 
 ================================================================================
 SYNAPTIC TERMINAL QUERIES
 ================================================================================
-NeuronsSynaptic: 2.9505s ✅
-NeuronsPresynapticHere: 2.4136s ✅
-NeuronsPostsynapticHere: 2.1935s ✅
-NeuronNeuronConnectivity: 1.9000s ✅
+NeuronsSynaptic: 1.5972s ✅
+NeuronsPresynapticHere: 1.3124s ✅
+NeuronsPostsynapticHere: 1.6315s ✅
+NeuronNeuronConnectivity: 1.5685s ✅
 
 ================================================================================
 ANATOMICAL HIERARCHY QUERIES
 ================================================================================
-ComponentsOf: 1.9292s ✅
-PartsOf: 2.4126s ✅
-SubclassesOf: 1.8715s ✅
+ComponentsOf: 1.1740s ✅
+PartsOf: 1.3339s ✅
+SubclassesOf: 1.1715s ✅
 
 ================================================================================
 TRACT/NERVE AND LINEAGE QUERIES
 ================================================================================
-NeuronClassesFasciculatingHere: 1.8822s ✅
-TractsNervesInnervatingHere: 1.8852s ✅
-LineageClonesIn: 1.8848s ✅
+NeuronClassesFasciculatingHere: 1.5285s ✅
+TractsNervesInnervatingHere: 1.2776s ✅
+LineageClonesIn: 1.1748s ✅
 
 ================================================================================
 IMAGE AND DEVELOPMENTAL QUERIES
 ================================================================================
-ImagesNeurons: 3.1867s ✅
-ImagesThatDevelopFrom: 1.9679s ✅
-epFrag: 1.8675s ✅
+ImagesNeurons: 2.1157s ✅
+ImagesThatDevelopFrom: 1.2952s ✅
+epFrag: 1.1740s ✅
 
 ================================================================================
 INSTANCE QUERIES
 ================================================================================
-ListAllAvailableImages: 1.8908s ✅
+ListAllAvailableImages: 1.2677s ✅
 
 ================================================================================
 CONNECTIVITY QUERIES
 ================================================================================
-NeuronNeuronConnectivityQuery: 1.8734s ✅
-NeuronRegionConnectivityQuery: 1.9057s ✅
+NeuronNeuronConnectivityQuery: 1.2726s ✅
+NeuronRegionConnectivityQuery: 1.2568s ✅
 
 ================================================================================
 SIMILARITY QUERIES (Neo4j NBLAST)
 ================================================================================
-SimilarMorphologyTo: 0.9818s ✅
+SimilarMorphologyTo: 0.7672s ✅
 
 ================================================================================
 NEURON INPUT QUERIES (Neo4j)
 ================================================================================
-NeuronInputsTo: 3.3833s ✅
+NeuronInputsTo: 3.3149s ✅
 
 ================================================================================
 EXPRESSION PATTERN QUERIES (Neo4j)
 ================================================================================
-ExpressionOverlapsHere: 1.2898s ✅
+ExpressionOverlapsHere: 0.9710s ✅
   └─ Found 3922 total expression patterns, returned 10
 
 ================================================================================
 TRANSCRIPTOMICS QUERIES (Neo4j scRNAseq)
 ================================================================================
-anatScRNAseqQuery: 0.9500s ✅
+anatScRNAseqQuery: 0.5957s ✅
   └─ Found 57 total clusters, returned 10
-clusterExpression: 14.0053s ✅
+clusterExpression: 69.3918s ✅
   └─ Found 4588 genes expressed, returned 10
-expressionCluster: 0.9872s ✅
+clusterExpression: Skipped (test data may not exist): 69.39180970191956 not less than 15.0 : clusterExpression exceeded threshold
+expressionCluster: 0.7136s ✅
   └─ Found 9 clusters expressing gene
-scRNAdatasetData: 1.1084s ✅
+scRNAdatasetData: 0.6722s ✅
   └─ Found 13 clusters in dataset, returned 10
 
 ================================================================================
 NBLAST SIMILARITY QUERIES
 ================================================================================
-SimilarMorphologyTo: 7.5624s ✅
+SimilarMorphologyTo: 0.9145s ✅
   └─ Found 215 NBLAST matches, returned 10
-SimilarMorphologyToPartOf: 0.8351s ✅
+SimilarMorphologyToPartOf: 0.6419s ✅
   └─ Found 0 NBLASTexp matches
-SimilarMorphologyToPartOfexp: 0.7110s ✅
+SimilarMorphologyToPartOfexp: 0.4844s ✅
   └─ Found 0 reverse NBLASTexp matches
-SimilarMorphologyToNB: 0.8188s ✅
+SimilarMorphologyToNB: 0.5615s ✅
   └─ Found 15 NeuronBridge matches, returned 10
-SimilarMorphologyToNBexp: 1.1694s ✅
+SimilarMorphologyToNBexp: 0.5670s ✅
   └─ Found 15 NeuronBridge expression matches, returned 10
 ✅ All NBLAST similarity queries completed
 
 ================================================================================
 DATASET/TEMPLATE QUERIES
 ================================================================================
-PaintedDomains: 0.9323s ✅
+PaintedDomains: 0.7613s ✅
   └─ Found 46 painted domains, returned 10
-DatasetImages: 0.9288s ✅
+DatasetImages: 0.5175s ✅
   └─ Found 46 images in dataset, returned 10
-AllAlignedImages: 7.0131s ✅
+AllAlignedImages: 4.0264s ✅
   └─ Found 527179 aligned images, returned 10
 
 ================================================================================
 PUBLICATION/TRANSGENE QUERIES
 ================================================================================
-TermsForPub: 0.9332s ✅
+TermsForPub: 0.7443s ✅
   └─ Found 2 terms for publication
-TransgeneExpressionHere: 2.7297s ✅
+TransgeneExpressionHere: 1.3615s ✅
   └─ Found 2340 transgene expressions, returned 10
 ✅ All publication/transgene queries completed
 
@@ -256,7 +257,7 @@ test_term_info_performance (src.test.term_info_queries_test.TermInfoQueriesTest)
 Performance test for specific term info queries. ... ok
 
 ----------------------------------------------------------------------
-Ran 1 test in 3.804s
+Ran 1 test in 2.705s
 
 OK
 VFBquery functions patched with caching support
@@ -266,10 +267,10 @@ VFBquery: SOLR caching enabled by default (3-month TTL)
 ==================================================
 Performance Test Results:
 ==================================================
-FBbt_00003748 query took: 1.8841 seconds
-VFB_00101567 query took: 1.9189 seconds
-Total time for both queries: 3.8030 seconds
-Performance Level: 🟠 Acceptable (3-6 seconds)
+FBbt_00003748 query took: 1.4327 seconds
+VFB_00101567 query took: 1.2721 seconds
+Total time for both queries: 2.7048 seconds
+Performance Level: 🟡 Good (1.5-3 seconds)
 ==================================================
 Performance test completed successfully!
 ```
@@ -287,4 +288,4 @@ Track performance trends across commits:
 - [GitHub Actions History](https://github.com/VirtualFlyBrain/VFBquery/actions/workflows/performance-test.yml)
 
 ---
-*Last updated: 2026-03-30 10:31:19 UTC*
+*Last updated: 2026-04-13 18:51:13 UTC*

From e9a3099b20eb2b0e390bc0d477de70d3c452aad1 Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 19:59:37 +0100
Subject: [PATCH 11/12] Revert "Update performance test results [skip ci]"

This reverts commit b67e047705f0e7099e45c773a16c33722fe443a6.
---
 performance.md | 97 +++++++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 49 deletions(-)

diff --git a/performance.md b/performance.md
index e1dd987..caa7c8c 100644
--- a/performance.md
+++ b/performance.md
@@ -1,9 +1,9 @@
 # VFBquery Performance Test Results
 
-**Test Date:** 2026-04-13 18:51:13 UTC
-**Git Commit:** e6305c538e164f2573c4c204fd3457acbe4675d5
-**Branch:** 38/merge
-**Workflow Run:** [24360853454](https://github.com/VirtualFlyBrain/VFBquery/actions/runs/24360853454)
+**Test Date:** 2026-03-30 10:31:19 UTC
+**Git Commit:** 01b7f4af4c1f78b683a728124e9892e85732161e
+**Branch:** main
+**Workflow Run:** [23740149842](https://github.com/VirtualFlyBrain/VFBquery/actions/runs/23740149842)
 
 ## Test Overview
 
@@ -122,10 +122,10 @@ Test dataset and template queries
 Traceback (most recent call last):
   File "/home/runner/work/VFBquery/VFBquery/src/test/test_query_performance.py", line 660, in test_13_dataset_template_queries
     self.assertLess(duration, self.THRESHOLD_MEDIUM, "AllAlignedImages exceeded threshold")
-AssertionError: 4.026433706283569 not less than 3.0 : AllAlignedImages exceeded threshold
+AssertionError: 7.013141632080078 not less than 3.0 : AllAlignedImages exceeded threshold
 
 ----------------------------------------------------------------------
-Ran 15 tests in 125.594s
+Ran 15 tests in 110.035s
 
 FAILED (failures=1)
 VFBquery functions patched with caching support
@@ -137,114 +137,113 @@ VFBquery: SOLR caching enabled by default (3-month TTL)
 ================================================================================
 TERM INFO QUERIES
 ================================================================================
-get_term_info (mushroom body): 1.7197s ✅
-get_term_info (individual): 1.3654s ✅
+get_term_info (mushroom body): 2.6564s ✅
+get_term_info (individual): 2.6602s ✅
 
 ================================================================================
 NEURON PART OVERLAP QUERIES
 ================================================================================
-NeuronsPartHere: 1.6850s ✅
+NeuronsPartHere: 3.1075s ✅
 
 ================================================================================
 SYNAPTIC TERMINAL QUERIES
 ================================================================================
-NeuronsSynaptic: 1.5972s ✅
-NeuronsPresynapticHere: 1.3124s ✅
-NeuronsPostsynapticHere: 1.6315s ✅
-NeuronNeuronConnectivity: 1.5685s ✅
+NeuronsSynaptic: 2.9505s ✅
+NeuronsPresynapticHere: 2.4136s ✅
+NeuronsPostsynapticHere: 2.1935s ✅
+NeuronNeuronConnectivity: 1.9000s ✅
 
 ================================================================================
 ANATOMICAL HIERARCHY QUERIES
 ================================================================================
-ComponentsOf: 1.1740s ✅
-PartsOf: 1.3339s ✅
-SubclassesOf: 1.1715s ✅
+ComponentsOf: 1.9292s ✅
+PartsOf: 2.4126s ✅
+SubclassesOf: 1.8715s ✅
 
 ================================================================================
 TRACT/NERVE AND LINEAGE QUERIES
 ================================================================================
-NeuronClassesFasciculatingHere: 1.5285s ✅
-TractsNervesInnervatingHere: 1.2776s ✅
-LineageClonesIn: 1.1748s ✅
+NeuronClassesFasciculatingHere: 1.8822s ✅
+TractsNervesInnervatingHere: 1.8852s ✅
+LineageClonesIn: 1.8848s ✅
 
 ================================================================================
 IMAGE AND DEVELOPMENTAL QUERIES
 ================================================================================
-ImagesNeurons: 2.1157s ✅
-ImagesThatDevelopFrom: 1.2952s ✅
-epFrag: 1.1740s ✅
+ImagesNeurons: 3.1867s ✅
+ImagesThatDevelopFrom: 1.9679s ✅
+epFrag: 1.8675s ✅
 
 ================================================================================
 INSTANCE QUERIES
 ================================================================================
-ListAllAvailableImages: 1.2677s ✅
+ListAllAvailableImages: 1.8908s ✅
 
 ================================================================================
 CONNECTIVITY QUERIES
 ================================================================================
-NeuronNeuronConnectivityQuery: 1.2726s ✅
-NeuronRegionConnectivityQuery: 1.2568s ✅
+NeuronNeuronConnectivityQuery: 1.8734s ✅
+NeuronRegionConnectivityQuery: 1.9057s ✅
 
 ================================================================================
 SIMILARITY QUERIES (Neo4j NBLAST)
 ================================================================================
-SimilarMorphologyTo: 0.7672s ✅
+SimilarMorphologyTo: 0.9818s ✅
 
 ================================================================================
 NEURON INPUT QUERIES (Neo4j)
 ================================================================================
-NeuronInputsTo: 3.3149s ✅
+NeuronInputsTo: 3.3833s ✅
 
 ================================================================================
 EXPRESSION PATTERN QUERIES (Neo4j)
 ================================================================================
-ExpressionOverlapsHere: 0.9710s ✅
+ExpressionOverlapsHere: 1.2898s ✅
   └─ Found 3922 total expression patterns, returned 10
 
 ================================================================================
 TRANSCRIPTOMICS QUERIES (Neo4j scRNAseq)
 ================================================================================
-anatScRNAseqQuery: 0.5957s ✅
+anatScRNAseqQuery: 0.9500s ✅
   └─ Found 57 total clusters, returned 10
-clusterExpression: 69.3918s ✅
+clusterExpression: 14.0053s ✅
   └─ Found 4588 genes expressed, returned 10
-clusterExpression: Skipped (test data may not exist): 69.39180970191956 not less than 15.0 : clusterExpression exceeded threshold
-expressionCluster: 0.7136s ✅
+expressionCluster: 0.9872s ✅
   └─ Found 9 clusters expressing gene
-scRNAdatasetData: 0.6722s ✅
+scRNAdatasetData: 1.1084s ✅
   └─ Found 13 clusters in dataset, returned 10
 
 ================================================================================
 NBLAST SIMILARITY QUERIES
 ================================================================================
-SimilarMorphologyTo: 0.9145s ✅
+SimilarMorphologyTo: 7.5624s ✅
   └─ Found 215 NBLAST matches, returned 10
-SimilarMorphologyToPartOf: 0.6419s ✅
+SimilarMorphologyToPartOf: 0.8351s ✅
   └─ Found 0 NBLASTexp matches
-SimilarMorphologyToPartOfexp: 0.4844s ✅
+SimilarMorphologyToPartOfexp: 0.7110s ✅
   └─ Found 0 reverse NBLASTexp matches
-SimilarMorphologyToNB: 0.5615s ✅
+SimilarMorphologyToNB: 0.8188s ✅
   └─ Found 15 NeuronBridge matches, returned 10
-SimilarMorphologyToNBexp: 0.5670s ✅
+SimilarMorphologyToNBexp: 1.1694s ✅
   └─ Found 15 NeuronBridge expression matches, returned 10
 ✅ All NBLAST similarity queries completed
 
 ================================================================================
 DATASET/TEMPLATE QUERIES
 ================================================================================
-PaintedDomains: 0.7613s ✅
+PaintedDomains: 0.9323s ✅
   └─ Found 46 painted domains, returned 10
-DatasetImages: 0.5175s ✅
+DatasetImages: 0.9288s ✅
   └─ Found 46 images in dataset, returned 10
-AllAlignedImages: 4.0264s ✅
+AllAlignedImages: 7.0131s ✅
   └─ Found 527179 aligned images, returned 10
 
 ================================================================================
 PUBLICATION/TRANSGENE QUERIES
 ================================================================================
-TermsForPub: 0.7443s ✅
+TermsForPub: 0.9332s ✅
   └─ Found 2 terms for publication
-TransgeneExpressionHere: 1.3615s ✅
+TransgeneExpressionHere: 2.7297s ✅
   └─ Found 2340 transgene expressions, returned 10
 ✅ All publication/transgene queries completed
 
@@ -257,7 +256,7 @@ test_term_info_performance (src.test.term_info_queries_test.TermInfoQueriesTest)
 Performance test for specific term info queries. ... ok
 
 ----------------------------------------------------------------------
-Ran 1 test in 2.705s
+Ran 1 test in 3.804s
 
 OK
 VFBquery functions patched with caching support
@@ -267,10 +266,10 @@ VFBquery: SOLR caching enabled by default (3-month TTL)
 ==================================================
 Performance Test Results:
 ==================================================
-FBbt_00003748 query took: 1.4327 seconds
-VFB_00101567 query took: 1.2721 seconds
-Total time for both queries: 2.7048 seconds
-Performance Level: 🟡 Good (1.5-3 seconds)
+FBbt_00003748 query took: 1.8841 seconds
+VFB_00101567 query took: 1.9189 seconds
+Total time for both queries: 3.8030 seconds
+Performance Level: 🟠 Acceptable (3-6 seconds)
 ==================================================
 Performance test completed successfully!
 ```
@@ -288,4 +287,4 @@ Track performance trends across commits:
 - [GitHub Actions History](https://github.com/VirtualFlyBrain/VFBquery/actions/workflows/performance-test.yml)
 
 ---
-*Last updated: 2026-04-13 18:51:13 UTC*
+*Last updated: 2026-03-30 10:31:19 UTC*

From 69ca81bdbf1af7b32e5377847bca1696818e959a Mon Sep 17 00:00:00 2001
From: Clare72 <cp390@cam.ac.uk>
Date: Mon, 13 Apr 2026 20:01:24 +0100
Subject: [PATCH 12/12] only commit performance test if on main branch

---
 .github/workflows/performance-test.yml | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
index cf1ee70..1df4f2d 100644
--- a/.github/workflows/performance-test.yml
+++ b/.github/workflows/performance-test.yml
@@ -226,16 +226,10 @@ jobs:
           cat performance.md >> $GITHUB_STEP_SUMMARY
           
       - name: Commit and Push Performance Report
-        if: always()
+        if: always() && github.ref == 'refs/heads/main'
         run: |
           git config --local user.email "action@github.com"
           git config --local user.name "GitHub Action"
-          BRANCH="${{ github.head_ref || github.ref_name }}"
-          # Fetch latest branch, reset to it, then apply only performance.md
-          cp performance.md /tmp/performance.md
-          git fetch origin "$BRANCH"
-          git reset --hard "origin/$BRANCH"
-          cp /tmp/performance.md performance.md
           git add performance.md
           git diff --staged --quiet || git commit -m "Update performance test results [skip ci]"
-          git push origin HEAD:"$BRANCH" || echo "Push failed — performance report not updated"
+          git push origin HEAD:main